From ea7b74389f6c3afab941fd4e593ec3c111758921 Mon Sep 17 00:00:00 2001 From: reger-men Date: Fri, 9 Jul 2021 03:50:40 +0000 Subject: [PATCH 01/22] First draft HIP Version --- OpenCL/inc_common.cl | 20 +- OpenCL/inc_common.h | 4 +- OpenCL/inc_platform.cl | 6 +- OpenCL/inc_platform.h | 6 +- OpenCL/inc_types.h | 7 +- OpenCL/inc_vendor.h | 16 +- OpenCL/m01700_a0-optimized.cl | 2 +- OpenCL/m01700_a1-optimized.cl | 2 +- OpenCL/m01700_a3-optimized.cl | 2 +- OpenCL/m01710_a0-optimized.cl | 2 +- OpenCL/m01710_a1-optimized.cl | 2 +- OpenCL/m01710_a3-optimized.cl | 2 +- OpenCL/m01720_a0-optimized.cl | 2 +- OpenCL/m01720_a1-optimized.cl | 2 +- OpenCL/m01720_a3-optimized.cl | 2 +- OpenCL/m01730_a0-optimized.cl | 2 +- OpenCL/m01730_a1-optimized.cl | 2 +- OpenCL/m01730_a3-optimized.cl | 2 +- OpenCL/m01740_a0-optimized.cl | 2 +- OpenCL/m01740_a1-optimized.cl | 2 +- OpenCL/m01740_a3-optimized.cl | 2 +- OpenCL/m02500-pure.cl | 2 +- OpenCL/m08000_a0-optimized.cl | 4 +- OpenCL/m08000_a1-optimized.cl | 4 +- OpenCL/m08000_a3-optimized.cl | 4 +- OpenCL/m08900-pure.cl | 10 +- OpenCL/m10800_a0-optimized.cl | 2 +- OpenCL/m10800_a1-optimized.cl | 2 +- OpenCL/m10800_a3-optimized.cl | 2 +- OpenCL/m15700-pure.cl | 10 +- OpenCL/m21000_a0-optimized.cl | 2 +- OpenCL/m21000_a1-optimized.cl | 4 +- OpenCL/m21000_a3-optimized.cl | 4 +- OpenCL/m22000-pure.cl | 2 +- OpenCL/m22001-pure.cl | 2 +- OpenCL/m22200_a0-optimized.cl | 2 +- OpenCL/m22200_a1-optimized.cl | 2 +- OpenCL/m22200_a3-optimized.cl | 2 +- OpenCL/m22700-pure.cl | 10 +- include/backend.h | 60 + include/ext_hip.h | 1131 ++++++++++ include/ext_hiprtc.h | 87 + include/types.h | 96 + src/Makefile | 4 +- src/backend.c | 3717 ++++++++++++++++++++++++++++++++- src/ext_hip.c | 8 + src/ext_hiprtc.c | 27 + src/selftest.c | 5 +- src/terminal.c | 53 + src/user_options.c | 3 + 50 files changed, 5255 insertions(+), 95 deletions(-) create mode 100644 include/ext_hip.h create mode 100644 include/ext_hiprtc.h create mode 100644 src/ext_hip.c create mode 100644 src/ext_hiprtc.c diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index ee008e083..51b83dd54 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -3,6 +3,10 @@ * License.....: MIT */ +#ifdef IS_HIP +#include +#endif + #include "inc_vendor.h" #include "inc_types.h" #include "inc_platform.h" @@ -879,7 +883,7 @@ DECLSPEC u32x hc_rotl32 (const u32x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl32 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotl32 (a, n); #else #ifdef USE_ROTATE @@ -894,7 +898,7 @@ DECLSPEC u32x hc_rotr32 (const u32x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr32 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotr32 (a, n); #else #ifdef USE_ROTATE @@ -909,7 +913,7 @@ DECLSPEC u32 hc_rotl32_S (const u32 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl32 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotl32_S (a, n); #else #ifdef USE_ROTATE @@ -924,7 +928,7 @@ DECLSPEC u32 hc_rotr32_S (const u32 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr32 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotr32_S (a, n); #else #ifdef USE_ROTATE @@ -939,7 +943,7 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl64 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotl64 (a, n); #elif defined IS_AMD return rotl64 (a, n); @@ -956,7 +960,7 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr64 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotr64 (a, n); #elif defined IS_AMD return rotr64 (a, n); @@ -973,7 +977,7 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl64 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotl64_S (a, n); #elif defined IS_AMD return rotl64_S (a, n); @@ -990,7 +994,7 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr64 (a, n); - #elif defined IS_CUDA + #elif defined IS_CUDA || defined IS_HIP return rotr64_S (a, n); #elif defined IS_AMD return rotr64_S (a, n); diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index 6e39b2ca3..fb65e2095 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -26,7 +26,7 @@ * - P19: Type of the esalt_bufs structure with additional data, or void. */ -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define KERN_ATTR(p2,p4,p5,p6,p19) \ MAYBE_UNUSED GLOBAL_AS pw_t *pws, \ MAYBE_UNUSED p2 const kernel_rule_t *g_rules_buf, \ @@ -109,7 +109,7 @@ * do not use rules or tmps, etc. */ -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, void) #define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bs_word_t *g_words_buf_s, void, void, void) #define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, e) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 768de504a..9265143c6 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -2,6 +2,9 @@ * Author......: See docs/credits.txt * License.....: MIT */ +#ifdef IS_HIP +#include +#endif #include "inc_vendor.h" #include "inc_types.h" @@ -60,7 +63,7 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n) #endif -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #if ATTACK_EXEC == 11 @@ -85,6 +88,7 @@ CONSTANT_VK u32 generic_constant[8192]; // 32k #endif + DECLSPEC u32 atomic_dec (u32 *p) { return atomicSub (p, 1); diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h index fdcf50fc1..422b29f4f 100644 --- a/OpenCL/inc_platform.h +++ b/OpenCL/inc_platform.h @@ -13,7 +13,7 @@ DECLSPEC u64 rotl64_S (const u64 a, const int n); DECLSPEC u64 rotr64_S (const u64 a, const int n); #endif -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP DECLSPEC u32 atomic_dec (u32 *p); DECLSPEC u32 atomic_inc (u32 *p); DECLSPEC u32 atomic_or (u32 *p, u32 val); @@ -30,7 +30,9 @@ DECLSPEC u64x rotr64 (const u64x a, const int n); DECLSPEC u64 rotl64_S (const u64 a, const int n); DECLSPEC u64 rotr64_S (const u64 a, const int n); -//#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n)))) +#ifdef IS_HIP +#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n)))) +#endif #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a)))) #endif diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 9f0664263..8b3d1e05c 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -6,14 +6,15 @@ #ifndef _INC_TYPES_H #define _INC_TYPES_H -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP //https://docs.nvidia.com/cuda/nvrtc/index.html#integer-size typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; -typedef unsigned long long ulong; +typedef unsigned long long xulong; #endif + #ifdef KERNEL_STATIC typedef uchar u8; typedef ushort u16; @@ -58,7 +59,7 @@ typedef u64 u64x; #define make_u64x (u64) #else -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #if VECT_SIZE == 2 diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 6ca2c5707..de2d23866 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -10,6 +10,8 @@ #define IS_NATIVE #elif defined __CUDACC__ #define IS_CUDA +#elif defined __HIPCC__ +#define IS_HIP #else #define IS_OPENCL #endif @@ -21,7 +23,7 @@ #define LOCAL_VK #define LOCAL_AS #define KERNEL_FQ -#elif defined IS_CUDA +#elif (defined IS_CUDA) || (defined IS_HIP) #define CONSTANT_VK __constant__ #define CONSTANT_AS #define GLOBAL_AS @@ -80,7 +82,9 @@ #define IS_MESA #define IS_GENERIC #elif VENDOR_ID == (1 << 5) -#define IS_NV +//#define IS_NV //TODO: FIX ME HIP +#define IS_POCL +#define IS_GENERIC #elif VENDOR_ID == (1 << 6) #define IS_POCL #define IS_GENERIC @@ -116,10 +120,14 @@ */ #if defined IS_AMD && defined IS_GPU -#define DECLSPEC inline static +#define DECLSPEC inline static __device__ +#else +#ifdef IS_HIP +#define DECLSPEC __device__ #else #define DECLSPEC #endif +#endif /** * AMD specific @@ -137,7 +145,7 @@ // Whitelist some OpenCL specific functions // This could create more stable kernels on systems with bad OpenCL drivers -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define USE_BITSELECT #define USE_ROTATE #endif diff --git a/OpenCL/m01700_a0-optimized.cl b/OpenCL/m01700_a0-optimized.cl index 0d6ddb337..fc46cc9a9 100644 --- a/OpenCL/m01700_a0-optimized.cl +++ b/OpenCL/m01700_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01700_a1-optimized.cl b/OpenCL/m01700_a1-optimized.cl index abee4dfcb..2a0b4f6e8 100644 --- a/OpenCL/m01700_a1-optimized.cl +++ b/OpenCL/m01700_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl index c4d8ee016..fc5ec06f9 100644 --- a/OpenCL/m01700_a3-optimized.cl +++ b/OpenCL/m01700_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01710_a0-optimized.cl b/OpenCL/m01710_a0-optimized.cl index a5a53e831..8a14e3104 100644 --- a/OpenCL/m01710_a0-optimized.cl +++ b/OpenCL/m01710_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01710_a1-optimized.cl b/OpenCL/m01710_a1-optimized.cl index 931142cae..ac19e3fde 100644 --- a/OpenCL/m01710_a1-optimized.cl +++ b/OpenCL/m01710_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl index a82f949ac..83d4afc87 100644 --- a/OpenCL/m01710_a3-optimized.cl +++ b/OpenCL/m01710_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01720_a0-optimized.cl b/OpenCL/m01720_a0-optimized.cl index c331365f1..d40e66975 100644 --- a/OpenCL/m01720_a0-optimized.cl +++ b/OpenCL/m01720_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01720_a1-optimized.cl b/OpenCL/m01720_a1-optimized.cl index aa93dc2c9..9a9c319f2 100644 --- a/OpenCL/m01720_a1-optimized.cl +++ b/OpenCL/m01720_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl index 891634dd4..a4cbfb4eb 100644 --- a/OpenCL/m01720_a3-optimized.cl +++ b/OpenCL/m01720_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01730_a0-optimized.cl b/OpenCL/m01730_a0-optimized.cl index f5da15e7f..eef27b4f2 100644 --- a/OpenCL/m01730_a0-optimized.cl +++ b/OpenCL/m01730_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01730_a1-optimized.cl b/OpenCL/m01730_a1-optimized.cl index f3cd8d89a..e86df4229 100644 --- a/OpenCL/m01730_a1-optimized.cl +++ b/OpenCL/m01730_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl index e00e5f4ae..c83e76a64 100644 --- a/OpenCL/m01730_a3-optimized.cl +++ b/OpenCL/m01730_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01740_a0-optimized.cl b/OpenCL/m01740_a0-optimized.cl index ee38662e8..f877c2075 100644 --- a/OpenCL/m01740_a0-optimized.cl +++ b/OpenCL/m01740_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01740_a1-optimized.cl b/OpenCL/m01740_a1-optimized.cl index 0ae6984e0..3d400425b 100644 --- a/OpenCL/m01740_a1-optimized.cl +++ b/OpenCL/m01740_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl index 4b7b1d3df..ac56cb697 100644 --- a/OpenCL/m01740_a3-optimized.cl +++ b/OpenCL/m01740_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl index 95f97fb81..4b2459f78 100644 --- a/OpenCL/m02500-pure.cl +++ b/OpenCL/m02500-pure.cl @@ -681,7 +681,7 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) s_te4[i] = te4[i]; } - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP __syncthreads(); #else SYNC_THREADS (); diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl index 310bebbeb..dabd57d3d 100644 --- a/OpenCL/m08000_a0-optimized.cl +++ b/OpenCL/m08000_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w) ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -143,7 +143,7 @@ DECLSPEC void sha256_transform_z (u32x *digest) ROUND_STEP_Z (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_STEP_Z (16); ROUND_STEP_Z (32); ROUND_STEP_Z (48); diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl index 89ea42a57..b7a42e88e 100644 --- a/OpenCL/m08000_a1-optimized.cl +++ b/OpenCL/m08000_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w) ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest) ROUND_STEP_Z (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_STEP_Z (16); ROUND_STEP_Z (32); ROUND_STEP_Z (48); diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl index fa76a3b72..77bb3225d 100644 --- a/OpenCL/m08000_a3-optimized.cl +++ b/OpenCL/m08000_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w) ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest) ROUND_STEP_Z (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_STEP_Z (16); ROUND_STEP_Z (32); ROUND_STEP_Z (48); diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl index 706f7f2e4..f5e607534 100644 --- a/OpenCL/m08900-pure.cl +++ b/OpenCL/m08900-pure.cl @@ -24,7 +24,7 @@ typedef struct } scrypt_tmp_t; -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } @@ -57,7 +57,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define SALSA20_2R() \ { \ @@ -205,7 +205,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); @@ -252,7 +252,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); @@ -328,7 +328,7 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t)) digest[6] = sha256_hmac_ctx2.opad.h[6]; digest[7] = sha256_hmac_ctx2.opad.h[7]; - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); #else diff --git a/OpenCL/m10800_a0-optimized.cl b/OpenCL/m10800_a0-optimized.cl index 396b389a6..4f350a2c7 100644 --- a/OpenCL/m10800_a0-optimized.cl +++ b/OpenCL/m10800_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m10800_a1-optimized.cl b/OpenCL/m10800_a1-optimized.cl index 11aa95dbd..8e985263a 100644 --- a/OpenCL/m10800_a1-optimized.cl +++ b/OpenCL/m10800_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl index cef22d51f..a548aad58 100644 --- a/OpenCL/m10800_a3-optimized.cl +++ b/OpenCL/m10800_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl index 3c54850a4..13c8724c7 100644 --- a/OpenCL/m15700-pure.cl +++ b/OpenCL/m15700-pure.cl @@ -24,7 +24,7 @@ typedef struct } scrypt_tmp_t; -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } @@ -64,7 +64,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define SALSA20_2R() \ { \ @@ -212,7 +212,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); @@ -259,7 +259,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); @@ -464,7 +464,7 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_ digest[6] = sha256_hmac_ctx2.opad.h[6]; digest[7] = sha256_hmac_ctx2.opad.h[7]; - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); #else diff --git a/OpenCL/m21000_a0-optimized.cl b/OpenCL/m21000_a0-optimized.cl index c7cfa5b8d..36ad9972f 100644 --- a/OpenCL/m21000_a0-optimized.cl +++ b/OpenCL/m21000_a0-optimized.cl @@ -89,7 +89,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m21000_a1-optimized.cl b/OpenCL/m21000_a1-optimized.cl index 7ff4577f7..f2beb1629 100644 --- a/OpenCL/m21000_a1-optimized.cl +++ b/OpenCL/m21000_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m21000_a3-optimized.cl b/OpenCL/m21000_a3-optimized.cl index 768ba9e02..757a87c8a 100644 --- a/OpenCL/m21000_a3-optimized.cl +++ b/OpenCL/m21000_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22000-pure.cl b/OpenCL/m22000-pure.cl index 954f62ce3..816a52458 100644 --- a/OpenCL/m22000-pure.cl +++ b/OpenCL/m22000-pure.cl @@ -703,7 +703,7 @@ KERNEL_FQ void m22000_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) s_te4[i] = te4[i]; } - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP __syncthreads(); #else SYNC_THREADS (); diff --git a/OpenCL/m22001-pure.cl b/OpenCL/m22001-pure.cl index e3a9d23f9..20c962313 100644 --- a/OpenCL/m22001-pure.cl +++ b/OpenCL/m22001-pure.cl @@ -610,7 +610,7 @@ KERNEL_FQ void m22001_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t)) s_te4[i] = te4[i]; } - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP __syncthreads(); #else SYNC_THREADS (); diff --git a/OpenCL/m22200_a0-optimized.cl b/OpenCL/m22200_a0-optimized.cl index 8c0e51b03..528222fe1 100644 --- a/OpenCL/m22200_a0-optimized.cl +++ b/OpenCL/m22200_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22200_a1-optimized.cl b/OpenCL/m22200_a1-optimized.cl index 39ca46c20..3fa91b5a8 100644 --- a/OpenCL/m22200_a1-optimized.cl +++ b/OpenCL/m22200_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22200_a3-optimized.cl b/OpenCL/m22200_a3-optimized.cl index c04f8c8c4..f620cca46 100644 --- a/OpenCL/m22200_a3-optimized.cl +++ b/OpenCL/m22200_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index f5ec90de5..0f5b84a4c 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -72,7 +72,7 @@ DECLSPEC int is_valid_bitcoinj (const u32 *w) return 1; } -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } @@ -105,7 +105,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); -#ifdef IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define SALSA20_2R() \ { \ @@ -253,7 +253,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP T[0] = make_uint4 (X[i + 0].x, X[i + 1].y, X[i + 2].z, X[i + 3].w); T[1] = make_uint4 (X[i + 1].x, X[i + 2].y, X[i + 3].z, X[i + 0].w); T[2] = make_uint4 (X[i + 2].x, X[i + 3].y, X[i + 0].z, X[i + 1].w); @@ -300,7 +300,7 @@ DECLSPEC void scrypt_smix (uint4 *X, uint4 *T, GLOBAL_AS uint4 *V0, GLOBAL_AS ui #endif for (u32 i = 0; i < STATE_CNT4; i += 4) { - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP T[0] = make_uint4 (X[i + 0].x, X[i + 3].y, X[i + 2].z, X[i + 1].w); T[1] = make_uint4 (X[i + 1].x, X[i + 0].y, X[i + 3].z, X[i + 2].w); T[2] = make_uint4 (X[i + 2].x, X[i + 1].y, X[i + 0].z, X[i + 3].w); @@ -416,7 +416,7 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t)) digest[6] = sha256_hmac_ctx2.opad.h[6]; digest[7] = sha256_hmac_ctx2.opad.h[7]; - #ifdef IS_CUDA + #if defined IS_CUDA || defined IS_HIP const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]); const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]); #else diff --git a/include/backend.h b/include/backend.h index 920f015cf..5a3aa990c 100644 --- a/include/backend.h +++ b/include/backend.h @@ -28,6 +28,12 @@ void cuda_close (hashcat_ctx_t *hashcat_ctx); int nvrtc_init (hashcat_ctx_t *hashcat_ctx); void nvrtc_close (hashcat_ctx_t *hashcat_ctx); +int hip_init (hashcat_ctx_t *hashcat_ctx); +void hip_close (hashcat_ctx_t *hashcat_ctx); + +int hiprtc_init (hashcat_ctx_t *hashcat_ctx); +void hiprtc_close (hashcat_ctx_t *hashcat_ctx); + int ocl_init (hashcat_ctx_t *hashcat_ctx); void ocl_close (hashcat_ctx_t *hashcat_ctx); @@ -79,6 +85,56 @@ int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state); int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut); + +int hc_hiprtcCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); +int hc_hiprtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog); +int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options); +int hc_hiprtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet); +int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log); +int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet); +int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx); +int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor); + +int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev); +int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); +int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); +int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config); +int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx); +int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev); +int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count); +int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal); +int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev); +int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev); +int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion); +int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags); +int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); +int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd); +int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); +int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream); +int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); +int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc); +int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value); +int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); +int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra); +int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize); +int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount); +int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount); +int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount); +int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr); +int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name); +int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues); +int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod); +int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags); +int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); +int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); +int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); +int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx); +int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut); +int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues); +int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state); +int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut); + + int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data); int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem); int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_command_queue *command_queue); @@ -122,6 +178,10 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *de int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size); int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size); +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size); + int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size); int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); diff --git a/include/ext_hip.h b/include/ext_hip.h new file mode 100644 index 000000000..15840d671 --- /dev/null +++ b/include/ext_hip.h @@ -0,0 +1,1131 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _EXT_HIP_H +#define _EXT_HIP_H + +/** + * TODO: FIX ME + */ + +#define __HIP_API_VERSION 4221131 + +/** + * HIP device pointer + * HIPdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform. + */ +#if __HIP_API_VERSION >= 3020 + +#if defined(_WIN64) || defined(__LP64__) +typedef unsigned long long HIPdeviceptr; +#else +typedef unsigned int HIPdeviceptr; +#endif + +#endif /* __HIP_API_VERSION >= 3020 */ + +typedef int HIPdevice; /**< HIP device */ +typedef struct HIPctx_st *HIPcontext; /**< HIP context */ +typedef struct HIPevent_st *HIPevent; /**< HIP event */ +typedef struct HIPfunc_st *HIPfunction; /**< HIP function */ +typedef struct HIPmod_st *HIPmodule; /**< HIP module */ +typedef struct HIPstream_st *HIPstream; /**< HIP stream */ +typedef struct HIPlinkState_st *HIPlinkState; + + +typedef enum hipError_enum { + /** + * The API call returned with no errors. In the case of query calls, this + * also means that the operation being queried is complete (see + * ::hipEventQuery() and ::hipStreamQuery()). + */ + HIP_SUCCESS = 0, + + /** + * This indicates that one or more of the parameters passed to the API call + * is not within an acceptable range of values. + */ + HIP_ERROR_INVALID_VALUE = 1, + + /** + * The API call failed because it was unable to allocate enough memory to + * perform the requested operation. + */ + HIP_ERROR_OUT_OF_MEMORY = 2, + + /** + * This indicates that the HIP driver has not been initialized with + * ::hipInit() or that initialization has failed. + */ + HIP_ERROR_NOT_INITIALIZED = 3, + + /** + * This indicates that the HIP driver is in the process of shutting down. + */ + HIP_ERROR_DEINITIALIZED = 4, + + /** + * This indicates profiler is not initialized for this run. This can + * happen when the application is running with external profiling tools + * like visual profiler. + */ + HIP_ERROR_PROFILER_DISABLED = 5, + + /** + * \deprecated + * This error return is deprecated as of HIP 5.0. It is no longer an error + * to attempt to enable/disable the profiling via ::hipProfilerStart or + * ::hipProfilerStop without initialization. + */ + HIP_ERROR_PROFILER_NOT_INITIALIZED = 6, + + /** + * \deprecated + * This error return is deprecated as of HIP 5.0. It is no longer an error + * to call hipProfilerStart() when profiling is already enabled. + */ + HIP_ERROR_PROFILER_ALREADY_STARTED = 7, + + /** + * \deprecated + * This error return is deprecated as of HIP 5.0. It is no longer an error + * to call hipProfilerStop() when profiling is already disabled. + */ + HIP_ERROR_PROFILER_ALREADY_STOPPED = 8, + + /** + * This indicates that no HIP-capable devices were detected by the installed + * HIP driver. + */ + HIP_ERROR_NO_DEVICE = 100, + + /** + * This indicates that the device ordinal supplied by the user does not + * correspond to a valid HIP device. + */ + HIP_ERROR_INVALID_DEVICE = 101, + + + /** + * This indicates that the device kernel image is invalid. This can also + * indicate an invalid HIP module. + */ + HIP_ERROR_INVALID_IMAGE = 200, + + /** + * This most frequently indicates that there is no context bound to the + * hiprrent thread. This can also be returned if the context passed to an + * API call is not a valid handle (such as a context that has had + * ::hipCtxDestroy() invoked on it). This can also be returned if a user + * mixes different API versions (i.e. 3010 context with 3020 API calls). + * See ::hipCtxGetApiVersion() for more details. + */ + HIP_ERROR_INVALID_CONTEXT = 201, + + /** + * This indicated that the context being supplied as a parameter to the + * API call was already the active context. + * \deprecated + * This error return is deprecated as of HIP 3.2. It is no longer an + * error to attempt to push the active context via ::hipCtxPushCurrent(). + */ + HIP_ERROR_CONTEXT_ALREADY_CURRENT = 202, + + /** + * This indicates that a map or register operation has failed. + */ + HIP_ERROR_MAP_FAILED = 205, + + /** + * This indicates that an unmap or unregister operation has failed. + */ + HIP_ERROR_UNMAP_FAILED = 206, + + /** + * This indicates that the specified array is currently mapped and thus + * cannot be destroyed. + */ + HIP_ERROR_ARRAY_IS_MAPPED = 207, + + /** + * This indicates that the resource is already mapped. + */ + HIP_ERROR_ALREADY_MAPPED = 208, + + /** + * This indicates that there is no kernel image available that is suitable + * for the device. This can occur when a user specifies code generation + * options for a particular HIP source file that do not include the + * corresponding device configuration. + */ + HIP_ERROR_NO_BINARY_FOR_GPU = 209, + + /** + * This indicates that a resource has already been acquired. + */ + HIP_ERROR_ALREADY_ACQUIRED = 210, + + /** + * This indicates that a resource is not mapped. + */ + HIP_ERROR_NOT_MAPPED = 211, + + /** + * This indicates that a mapped resource is not available for access as an + * array. + */ + HIP_ERROR_NOT_MAPPED_AS_ARRAY = 212, + + /** + * This indicates that a mapped resource is not available for access as a + * pointer. + */ + HIP_ERROR_NOT_MAPPED_AS_POINTER = 213, + + /** + * This indicates that an uncorrectable ECC error was detected during + * execution. + */ + HIP_ERROR_ECC_UNCORRECTABLE = 214, + + /** + * This indicates that the ::HIPlimit passed to the API call is not + * supported by the active device. + */ + HIP_ERROR_UNSUPPORTED_LIMIT = 215, + + /** + * This indicates that the ::HIPcontext passed to the API call can + * only be bound to a single CPU thread at a time but is already + * bound to a CPU thread. + */ + HIP_ERROR_CONTEXT_ALREADY_IN_USE = 216, + + /** + * This indicates that peer access is not supported across the given + * devices. + */ + HIP_ERROR_PEER_ACCESS_UNSUPPORTED = 217, + + /** + * This indicates that a PTX JIT compilation failed. + */ + HIP_ERROR_INVALID_PTX = 218, + + /** + * This indicates an error with OpenGL or DirectX context. + */ + HIP_ERROR_INVALID_GRAPHICS_CONTEXT = 219, + + /** + * This indicates that an uncorrectable NVLink error was detected during the + * execution. + */ + HIP_ERROR_NVLINK_UNCORRECTABLE = 220, + + /** + * This indicates that the PTX JIT compiler library was not found. + */ + HIP_ERROR_JIT_COMPILER_NOT_FOUND = 221, + + /** + * This indicates that the device kernel source is invalid. + */ + HIP_ERROR_INVALID_SOURCE = 300, + + /** + * This indicates that the file specified was not found. + */ + HIP_ERROR_FILE_NOT_FOUND = 301, + + /** + * This indicates that a link to a shared object failed to resolve. + */ + HIP_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, + + /** + * This indicates that initialization of a shared object failed. + */ + HIP_ERROR_SHARED_OBJECT_INIT_FAILED = 303, + + /** + * This indicates that an OS call failed. + */ + HIP_ERROR_OPERATING_SYSTEM = 304, + + /** + * This indicates that a resource handle passed to the API call was not + * valid. Resource handles are opaque types like ::HIPstream and ::HIPevent. + */ + HIP_ERROR_INVALID_HANDLE = 400, + + /** + * This indicates that a resource required by the API call is not in a + * valid state to perform the requested operation. + */ + HIP_ERROR_ILLEGAL_STATE = 401, + + /** + * This indicates that a named symbol was not found. Examples of symbols + * are global/constant variable names, texture names, and surface names. + */ + HIP_ERROR_NOT_FOUND = 500, + + /** + * This indicates that asynchronous operations issued previously have not + * completed yet. This result is not actually an error, but must be indicated + * differently than ::HIP_SUCCESS (which indicates completion). Calls that + * may return this value include ::hipEventQuery() and ::hipStreamQuery(). + */ + HIP_ERROR_NOT_READY = 600, + + /** + * While executing a kernel, the device encountered a + * load or store instruction on an invalid memory address. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_ILLEGAL_ADDRESS = 700, + + /** + * This indicates that a launch did not occur because it did not have + * appropriate resources. This error usually indicates that the user has + * attempted to pass too many arguments to the device kernel, or the + * kernel launch specifies too many threads for the kernel's register + * count. Passing arguments of the wrong size (i.e. a 64-bit pointer + * when a 32-bit int is expected) is equivalent to passing too many + * arguments and can also result in this error. + */ + HIP_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, + + /** + * This indicates that the device kernel took too long to execute. This can + * only occur if timeouts are enabled - see the device attribute + * ::HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_LAUNCH_TIMEOUT = 702, + + /** + * This error indicates a kernel launch that uses an incompatible texturing + * mode. + */ + HIP_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, + + /** + * This error indicates that a call to ::hipCtxEnablePeerAccess() is + * trying to re-enable peer access to a context which has already + * had peer access to it enabled. + */ + HIP_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, + + /** + * This error indicates that ::hipCtxDisablePeerAccess() is + * trying to disable peer access which has not been enabled yet + * via ::hipCtxEnablePeerAccess(). + */ + HIP_ERROR_PEER_ACCESS_NOT_ENABLED = 705, + + /** + * This error indicates that the primary context for the specified device + * has already been initialized. + */ + HIP_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, + + /** + * This error indicates that the context hiprrent to the calling thread + * has been destroyed using ::hipCtxDestroy, or is a primary context which + * has not yet been initialized. + */ + HIP_ERROR_CONTEXT_IS_DESTROYED = 709, + + /** + * A device-side assert triggered during kernel execution. The context + * cannot be used anymore, and must be destroyed. All existing device + * memory allocations from this context are invalid and must be + * reconstructed if the program is to continue using HIP. + */ + HIP_ERROR_ASSERT = 710, + + /** + * This error indicates that the hardware resources required to enable + * peer access have been exhausted for one or more of the devices + * passed to ::hipCtxEnablePeerAccess(). + */ + HIP_ERROR_TOO_MANY_PEERS = 711, + + /** + * This error indicates that the memory range passed to ::hipMemHostRegister() + * has already been registered. + */ + HIP_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, + + /** + * This error indicates that the pointer passed to ::hipMemHostUnregister() + * does not correspond to any currently registered memory region. + */ + HIP_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, + + /** + * While executing a kernel, the device encountered a stack error. + * This can be due to stack corruption or exceeding the stack size limit. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_HARDWARE_STACK_ERROR = 714, + + /** + * While executing a kernel, the device encountered an illegal instruction. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_ILLEGAL_INSTRUCTION = 715, + + /** + * While executing a kernel, the device encountered a load or store instruction + * on a memory address which is not aligned. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_MISALIGNED_ADDRESS = 716, + + /** + * While executing a kernel, the device encountered an instruction + * which can only operate on memory locations in certain address spaces + * (global, shared, or local), but was supplied a memory address not + * belonging to an allowed address space. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_INVALID_ADDRESS_SPACE = 717, + + /** + * While executing a kernel, the device program counter wrapped its address space. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_INVALID_PC = 718, + + /** + * An exception occurred on the device while executing a kernel. Common + * causes include dereferencing an invalid device pointer and accessing + * out of bounds shared memory. Less common cases can be system specific - more + * information about these cases can be found in the system specific user guide. + * This leaves the process in an inconsistent state and any further HIP work + * will return the same error. To continue using HIP, the process must be terminated + * and relaunched. + */ + HIP_ERROR_LAUNCH_FAILED = 719, + + /** + * This error indicates that the number of blocks launched per grid for a kernel that was + * launched via either ::hipLaunchCooperativeKernel or ::hipLaunchCooperativeKernelMultiDevice + * exceeds the maximum number of blocks as allowed by ::hipOccupancyMaxActiveBlocksPerMultiprocessor + * or ::hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors + * as specified by the device attribute ::HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. + */ + HIP_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, + + /** + * This error indicates that the attempted operation is not permitted. + */ + HIP_ERROR_NOT_PERMITTED = 800, + + /** + * This error indicates that the attempted operation is not supported + * on the current system or device. + */ + HIP_ERROR_NOT_SUPPORTED = 801, + + /** + * This error indicates that the system is not yet ready to start any HIP + * work. To continue using HIP, verify the system configuration is in a + * valid state and all required driver daemons are actively running. + * More information about this error can be found in the system specific + * user guide. + */ + HIP_ERROR_SYSTEM_NOT_READY = 802, + + /** + * This error indicates that there is a mismatch between the versions of + * the display driver and the HIP driver. Refer to the compatibility documentation + * for supported versions. + */ + HIP_ERROR_SYSTEM_DRIVER_MISMATCH = 803, + + /** + * This error indicates that the system was upgraded to run with forward compatibility + * but the visible hardware detected by HIP does not support this configuration. + * Refer to the compatibility documentation for the supported hardware matrix or ensure + * that only supported hardware is visible during initialization via the HIP_VISIBLE_DEVICES + * environment variable. + */ + HIP_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, + + /** + * This error indicates that the operation is not permitted when + * the stream is capturing. + */ + HIP_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, + + /** + * This error indicates that the current capture sequence on the stream + * has been invalidated due to a previous error. + */ + HIP_ERROR_STREAM_CAPTURE_INVALIDATED = 901, + + /** + * This error indicates that the operation would have resulted in a merge + * of two independent capture sequences. + */ + HIP_ERROR_STREAM_CAPTURE_MERGE = 902, + + /** + * This error indicates that the capture was not initiated in this stream. + */ + HIP_ERROR_STREAM_CAPTURE_UNMATCHED = 903, + + /** + * This error indicates that the capture sequence contains a fork that was + * not joined to the primary stream. + */ + HIP_ERROR_STREAM_CAPTURE_UNJOINED = 904, + + /** + * This error indicates that a dependency would have been created which + * crosses the capture sequence boundary. Only implicit in-stream ordering + * dependencies are allowed to cross the boundary. + */ + HIP_ERROR_STREAM_CAPTURE_ISOLATION = 905, + + /** + * This error indicates a disallowed implicit dependency on a current capture + * sequence from HIPStreamLegacy. + */ + HIP_ERROR_STREAM_CAPTURE_IMPLICIT = 906, + + /** + * This error indicates that the operation is not permitted on an event which + * was last recorded in a capturing stream. + */ + HIP_ERROR_CAPTURED_EVENT = 907, + + /** + * A stream capture sequence not initiated with the ::HIP_STREAM_CAPTURE_MODE_RELAXED + * argument to ::HIPStreamBeginCapture was passed to ::hipStreamEndCapture in a + * different thread. + */ + HIP_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, + + /** + * This indicates that an unknown internal error has occurred. + */ + HIP_ERROR_UNKNOWN = 999 +} HIPresult; + +/** + * Online compiler and linker options + */ +typedef enum HIPjit_option_enum +{ + /** + * Max number of registers that a thread may use.\n + * Option type: unsigned int\n + * Applies to: compiler only + */ + HIP_JIT_MAX_REGISTERS = 0, + + /** + * IN: Specifies minimum number of threads per block to target compilation + * for\n + * OUT: Returns the number of threads the compiler actually targeted. + * This restricts the resource utilization fo the compiler (e.g. max + * registers) such that a block with the given number of threads should be + * able to launch based on register limitations. Note, this option does not + * currently take into account any other resource limitations, such as + * shared memory utilization.\n + * Cannot be combined with ::HIP_JIT_TARGET.\n + * Option type: unsigned int\n + * Applies to: compiler only + */ + HIP_JIT_THREADS_PER_BLOCK, + + /** + * Overwrites the option value with the total wall clock time, in + * milliseconds, spent in the compiler and linker\n + * Option type: float\n + * Applies to: compiler and linker + */ + HIP_JIT_WALL_TIME, + + /** + * Pointer to a buffer in which to print any log messages + * that are informational in nature (the buffer size is specified via + * option ::HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n + * Option type: char *\n + * Applies to: compiler and linker + */ + HIP_JIT_INFO_LOG_BUFFER, + + /** + * IN: Log buffer size in bytes. Log messages will be capped at this size + * (including null terminator)\n + * OUT: Amount of log buffer filled with messages\n + * Option type: unsigned int\n + * Applies to: compiler and linker + */ + HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + + /** + * Pointer to a buffer in which to print any log messages that + * reflect errors (the buffer size is specified via option + * ::HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n + * Option type: char *\n + * Applies to: compiler and linker + */ + HIP_JIT_ERROR_LOG_BUFFER, + + /** + * IN: Log buffer size in bytes. Log messages will be capped at this size + * (including null terminator)\n + * OUT: Amount of log buffer filled with messages\n + * Option type: unsigned int\n + * Applies to: compiler and linker + */ + HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + + /** + * Level of optimizations to apply to generated code (0 - 4), with 4 + * being the default and highest level of optimizations.\n + * Option type: unsigned int\n + * Applies to: compiler only + */ + HIP_JIT_OPTIMIZATION_LEVEL, + + /** + * No option value required. Determines the target based on the current + * attached context (default)\n + * Option type: No option value needed\n + * Applies to: compiler and linker + */ + HIP_JIT_TARGET_FROM_HIPCONTEXT, + + /** + * Target is chosen based on supplied ::HIPjit_target. Cannot be + * combined with ::HIP_JIT_THREADS_PER_BLOCK.\n + * Option type: unsigned int for enumerated type ::HIPjit_target\n + * Applies to: compiler and linker + */ + HIP_JIT_TARGET, + + /** + * Specifies choice of fallback strategy if matching HIPbin is not found. + * Choice is based on supplied ::HIPjit_fallback. This option cannot be + * used with HIPLink* APIs as the linker requires exact matches.\n + * Option type: unsigned int for enumerated type ::HIPjit_fallback\n + * Applies to: compiler only + */ + HIP_JIT_FALLBACK_STRATEGY, + + /** + * Specifies whether to create debug information in output (-g) + * (0: false, default)\n + * Option type: int\n + * Applies to: compiler and linker + */ + HIP_JIT_GENERATE_DEBUG_INFO, + + /** + * Generate verbose log messages (0: false, default)\n + * Option type: int\n + * Applies to: compiler and linker + */ + HIP_JIT_LOG_VERBOSE, + + /** + * Generate line number information (-lineinfo) (0: false, default)\n + * Option type: int\n + * Applies to: compiler only + */ + HIP_JIT_GENERATE_LINE_INFO, + + /** + * Specifies whether to enable caching explicitly (-dlcm) \n + * Choice is based on supplied ::HIPjit_cacheMode_enum.\n + * Option type: unsigned int for enumerated type ::HIPjit_cacheMode_enum\n + * Applies to: compiler only + */ + HIP_JIT_CACHE_MODE, + + /** + * The below jit options are used for internal purposes only, in this version of HIP + */ + HIP_JIT_NEW_SM3X_OPT, + HIP_JIT_FAST_COMPILE, + + /** + * Array of device symbol names that will be relocated to the corresponing + * host addresses stored in ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES.\n + * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n + * When loding a device module, driver will relocate all encountered + * unresolved symbols to the host addresses.\n + * It is only allowed to register symbols that correspond to unresolved + * global variables.\n + * It is illegal to register the same device symbol at multiple addresses.\n + * Option type: const char **\n + * Applies to: dynamic linker only + */ + HIP_JIT_GLOBAL_SYMBOL_NAMES, + + /** + * Array of host addresses that will be used to relocate corresponding + * device symbols stored in ::HIP_JIT_GLOBAL_SYMBOL_NAMES.\n + * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n + * Option type: void **\n + * Applies to: dynamic linker only + */ + HIP_JIT_GLOBAL_SYMBOL_ADDRESSES, + + /** + * Number of entries in ::HIP_JIT_GLOBAL_SYMBOL_NAMES and + * ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n + * Option type: unsigned int\n + * Applies to: dynamic linker only + */ + HIP_JIT_GLOBAL_SYMBOL_COUNT, + + HIP_JIT_NUM_OPTIONS + +} HIPjit_option; + + +/** + * Device properties + */ +typedef enum HIPdevice_attribute_enum { + + HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, /**< Maximum number of threads per block */ + HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 1, /**< Maximum block dimension X */ + HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 2, /**< Maximum block dimension Y */ + HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 3, /**< Maximum block dimension Z */ + HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 4, /**< Maximum grid dimension X */ + HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 5, /**< Maximum grid dimension Y */ + HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 6, /**< Maximum grid dimension Z */ + HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 7, /**< Maximum shared memory available per block in bytes */ + HIP_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 7, /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ + HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 7, /**< Maximum optin shared memory per block */ + HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 8, /**< Memory available on device for __constant__ variables in a HIP C kernel in bytes */ + HIP_DEVICE_ATTRIBUTE_WARP_SIZE = 9, /**< Warp size in threads */ + HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 10, /**< Maximum number of 32-bit registers available per block */ + HIP_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 10, /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ + HIP_DEVICE_ATTRIBUTE_CLOCK_RATE = 11, /**< Typical clock frequency in kilohertz */ + HIP_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 12, /**< Peak memory clock frequency in kilohertz */ + HIP_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 13, /**< Global memory bus width in bits */ + HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 14, /**< Number of multiprocessors on device */ + HIP_DEVICE_ATTRIBUTE_COMPUTE_MODE = 15, /**< Compute mode (See ::HIPcomputemode for details) */ + HIP_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 16, /**< Size of L2 cache in bytes */ + HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 17, /**< Maximum resident threads per multiprocessor */ + HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 18, /**< Major compute capability version number */ + HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 19, /**< Minor compute capability version number */ + HIP_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 20, /**< Device can possibly execute multiple kernels concurrently */ + HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID = 21, /**< PCI bus ID of the device */ + HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 22, /**< PCI device ID of the device */ + HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 22, /**< PCI domain ID of the device */ + HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 23, /**< Maximum shared memory available per multiprocessor in bytes */ + HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 24, /**< Device is on a multi-GPU board */ + HIP_DEVICE_ATTRIBUTE_INTEGRATED = 25, /**< Device is integrated with host memory */ + HIP_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 26, /**< Device supports launching cooperative kernels via ::hipLaunchCooperativeKernel */ + HIP_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 27, /**< Device can participate in cooperative kernels launched via ::hipLaunchCooperativeKernelMultiDevice */ + HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 28, /**< Maximum 1D texture width */ + HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 29, /**< Maximum 2D texture width */ + HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 30, /**< Maximum 2D texture height */ + HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 31, /**< Maximum 3D texture width */ + HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 32, /**< Maximum 3D texture height */ + HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 33, /**< Maximum 3D texture depth */ + + HIP_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 37, /**< Alignment requirement for textures */ + HIP_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 38, /**< Pitch alignment requirement for textures */ + HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 39, /**< Specifies whether there is a run time limit on kernels */ + HIP_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 40, /**< Device can map host memory into HIP address space */ + HIP_DEVICE_ATTRIBUTE_ECC_ENABLED = 41, /**< Device has ECC support enabled */ + + HIP_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 47, /**< Device can allocate managed memory on this system */ + HIP_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 48, /**< The host can directly access managed memory on the device without migration. */ + HIP_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 49, /**< Device can coherently access managed memory concurrently with the CPU */ + HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 50, /**< Device supports coherently accessing pageable memory without calling HIPHostRegister on it */ + HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 51, /**< Device accesses pageable memory via the host's page tables. */ + HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 52, /**< ::HIP_STREAM_WAIT_VALUE_NOR is supported. */ + + + // HIP_DEVICE_ATTRIBUTE_MAX_PITCH = , /**< Maximum pitch in bytes allowed by memory copies */ + // HIP_DEVICE_ATTRIBUTE_GPU_OVERLAP = , /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */ + // + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = , /**< Maximum 2D layered texture width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = , /**< Maximum 2D layered texture height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = , /**< Maximum layers in a 2D layered texture */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */ + // HIP_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT =, /**< Alignment requirement for surfaces */ + // HIP_DEVICE_ATTRIBUTE_TCC_DRIVER = , /**< Device is using TCC driver model */ + // HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = , /**< Number of asynchronous engines */ + // HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = , /**< Device shares a unified address space with the host */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = , /**< Maximum 1D layered texture width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = , /**< Maximum layers in a 1D layered texture */ + // HIP_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = , /**< Deprecated, do not use. */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = , /**< Maximum 2D texture width if HIP_ARRAY3D_TEXTURE_GATHER is set */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = , /**< Maximum 2D texture height if HIP_ARRAY3D_TEXTURE_GATHER is set */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = , /**< Alternate maximum 3D texture width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = ,/**< Alternate maximum 3D texture height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = , /**< Alternate maximum 3D texture depth */ + // + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = , /**< Maximum cubemap texture width/height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = , /**< Maximum cubemap layered texture width/height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered texture */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = , /**< Maximum 1D surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = , /**< Maximum 2D surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = , /**< Maximum 2D surface height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = , /**< Maximum 3D surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = , /**< Maximum 3D surface height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = , /**< Maximum 3D surface depth */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = , /**< Maximum 1D layered surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = , /**< Maximum layers in a 1D layered surface */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = , /**< Maximum 2D layered surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = , /**< Maximum 2D layered surface height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = , /**< Maximum layers in a 2D layered surface */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = , /**< Maximum cubemap surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = , /**< Maximum cubemap layered surface width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered surface */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = , /**< Maximum 1D linear texture width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = , /**< Maximum 2D linear texture width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = , /**< Maximum 2D linear texture height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = , /**< Maximum 2D linear texture pitch in bytes */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 2D texture width */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = ,/**< Maximum mipmapped 2D texture height */ + // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 1D texture width */ + // HIP_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = , /**< Device supports stream priorities */ + // HIP_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = , /**< Device supports caching globals in L1 */ + // HIP_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = , /**< Device supports caching locals in L1 */ + // HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = , /**< Maximum number of 32-bit registers available per multiprocessor */ + // HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = , /**< Unique id for a group of devices on the same multi-GPU board */ + // HIP_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = , /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/ + // HIP_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = , /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */ + // HIP_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = , /**< Device supports compute preemption. */ + // HIP_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = , /**< Device can access host registered memory at the same virtual address as the CPU */ + // HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = , /**< ::hipStreamBatchMemOp and related APIs are supported. */ + // HIP_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = , /**< 64-bit operations are supported in ::hipStreamBatchMemOp and related APIs. */ + // HIP_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = , /**< Both the ::HIP_STREAM_WAIT_VALUE_FLUSH flag and the ::HIP_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref HIP_MEMOP for additional details. */ + // HIP_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = , /**< Device supports host memory registration via ::HIPHostRegister. */ + // HIP_DEVICE_ATTRIBUTE_MAX +} HIPdevice_attribute; + +/** + * Function cache configurations + */ +typedef enum HIPfunc_cache_enum { + HIP_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ + HIP_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ + HIP_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */ + HIP_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */ +} HIPfunc_cache; + +/** + * Shared memory configurations + */ +typedef enum HIPsharedconfig_enum { + HIP_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, /**< set default shared memory bank size */ + HIP_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, /**< set shared memory bank width to four bytes */ + HIP_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 /**< set shared memory bank width to eight bytes */ +} HIPsharedconfig; + +/** + * Function properties + */ +typedef enum HIPfunction_attribute_enum { + /** + * The maximum number of threads per block, beyond which a launch of the + * function would fail. This number depends on both the function and the + * device on which the function is currently loaded. + */ + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, + + /** + * The size in bytes of statically-allocated shared memory required by + * this function. This does not include dynamically-allocated shared + * memory requested by the user at runtime. + */ + HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, + + /** + * The size in bytes of user-allocated constant memory required by this + * function. + */ + HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, + + /** + * The size in bytes of local memory used by each thread of this function. + */ + HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, + + /** + * The number of registers used by each thread of this function. + */ + HIP_FUNC_ATTRIBUTE_NUM_REGS = 4, + + /** + * The PTX virtual architecture version for which the function was + * compiled. This value is the major PTX version * 10 + the minor PTX + * version, so a PTX version 1.3 function would return the value 13. + * Note that this may return the undefined value of 0 for cubins + * compiled prior to HIP 3.0. + */ + HIP_FUNC_ATTRIBUTE_PTX_VERSION = 5, + + /** + * The binary architecture version for which the function was compiled. + * This value is the major binary version * 10 + the minor binary version, + * so a binary version 1.3 function would return the value 13. Note that + * this will return a value of 10 for legacy cubins that do not have a + * properly-encoded binary architecture version. + */ + HIP_FUNC_ATTRIBUTE_BINARY_VERSION = 6, + + /** + * The attribute to indicate whether the function has been compiled with + * user specified option "-Xptxas --dlcm=ca" set . + */ + HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7, + + /** + * The maximum size in bytes of dynamically-allocated shared memory that can be used by + * this function. If the user-specified dynamic shared memory size is larger than this + * value, the launch will fail. + * See ::hipFuncSetAttribute + */ + HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8, + + /** + * On devices where the L1 cache and shared memory use the same hardware resources, + * this sets the shared memory carveout preference, in percent of the total shared memory. + * Refer to ::HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR. + * This is only a hint, and the driver can choose a different ratio if required to execute the function. + * See ::hipFuncSetAttribute + */ + HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9, + + HIP_FUNC_ATTRIBUTE_MAX +} HIPfunction_attribute; + +/** + * Context creation flags + */ +typedef enum HIPctx_flags_enum { + HIP_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ + HIP_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ + HIP_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ + HIP_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ + HIP_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling + * \deprecated This flag was deprecated as of HIP 4.0 + * and was replaced with ::HIP_CTX_SCHED_BLOCKING_SYNC. */ + HIP_CTX_SCHED_MASK = 0x07, + HIP_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ + HIP_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ + HIP_CTX_FLAGS_MASK = 0x1f +} HIPctx_flags; + +/** + * Stream creation flags + */ +typedef enum HIPstream_flags_enum { + HIP_STREAM_DEFAULT = 0x0, /**< Default stream flag */ + HIP_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */ +} HIPstream_flags; + +/** + * Event creation flags + */ +typedef enum HIPevent_flags_enum { + HIP_EVENT_DEFAULT = 0x0, /**< Default event flag */ + HIP_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */ + HIP_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */ + HIP_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. HIP_EVENT_DISABLE_TIMING must be set */ +} HIPevent_flags; + +typedef enum HIPjitInputType_enum +{ + /** + * Compiled device-class-specific device code\n + * Applicable options: none + */ + HIP_JIT_INPUT_HIPBIN = 0, + + /** + * PTX source code\n + * Applicable options: PTX compiler options + */ + HIP_JIT_INPUT_PTX, + + /** + * Bundle of multiple cubins and/or PTX of some device code\n + * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY + */ + HIP_JIT_INPUT_FATBINARY, + + /** + * Host object with embedded device code\n + * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY + */ + HIP_JIT_INPUT_OBJECT, + + /** + * Archive of host objects with embedded device code\n + * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY + */ + HIP_JIT_INPUT_LIBRARY, + + HIP_JIT_NUM_INPUT_TYPES +} HIPjitInputType; + +#ifdef _WIN32 +#define HIPAPI __stdcall +#else +#define HIPAPI +#endif + +#define HIP_API_CALL HIPAPI + +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXCREATE) (HIPcontext *, unsigned int, HIPdevice); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXDESTROY) (HIPcontext); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCACHECONFIG) (HIPfunc_cache *); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCURRENT) (HIPcontext *); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETSHAREDMEMCONFIG) (HIPsharedconfig *); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (HIPcontext *); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (HIPcontext); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCACHECONFIG) (HIPfunc_cache); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (HIPcontext); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETSHAREDMEMCONFIG) (HIPsharedconfig); +typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (); +typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, HIPdevice_attribute, HIPdevice); +typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *); +typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGET) (HIPdevice *, int); +typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, HIPdevice); +typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, HIPdevice); +typedef HIPresult (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *); +typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTCREATE) (HIPevent *, unsigned int); +typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTDESTROY) (HIPevent); +typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, HIPevent, HIPevent); +typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTQUERY) (HIPevent); +typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTRECORD) (HIPevent, HIPstream); +typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE) (HIPevent); +typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE) (int *, HIPfunction_attribute, HIPfunction); +typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETATTRIBUTE) (HIPfunction, HIPfunction_attribute, int); +typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETCACHECONFIG) (HIPfunction, HIPfunc_cache); +typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETSHAREDMEMCONFIG) (HIPfunction, HIPsharedconfig); +typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORNAME) (HIPresult, const char **); +typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORSTRING) (HIPresult, const char **); +typedef HIPresult (HIP_API_CALL *HIP_HIPINIT) (unsigned int); +typedef HIPresult (HIP_API_CALL *HIP_HIPLAUNCHKERNEL) (HIPfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, HIPstream, void **, void **); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOC) (HIPdeviceptr *, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOCHOST) (void **, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOD) (HIPdeviceptr, HIPdeviceptr, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOH) (void *, HIPdeviceptr, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTOD) (HIPdeviceptr, const void *, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE) (HIPdeviceptr); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST) (void *); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD32) (HIPdeviceptr, unsigned int, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD8) (HIPdeviceptr, unsigned char, size_t); +typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION) (HIPfunction *, HIPmodule, const char *); +typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (HIPdeviceptr *, size_t *, HIPmodule, const char *); +typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOAD) (HIPmodule *, const char *); +typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATA) (HIPmodule *, const void *); +typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (HIPmodule *, const void *, unsigned int, HIPjit_option *, void **); +typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (HIPmodule); +typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTART) (); +typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTOP) (); +typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMCREATE) (HIPstream *, unsigned int); +typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (HIPstream); +typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (HIPstream); +typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMWAITEVENT) (HIPstream, HIPevent, unsigned int); +typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCREATE) (unsigned int, HIPjit_option *, void **, HIPlinkState *); +typedef HIPresult (HIP_API_CALL *HIP_HIPLINKADDDATA) (HIPlinkState, HIPjitInputType, void *, size_t, const char *, unsigned int, HIPjit_option *, void **); +typedef HIPresult (HIP_API_CALL *HIP_HIPLINKDESTROY) (HIPlinkState); +typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCOMPLETE) (HIPlinkState, void **, size_t *); + +typedef struct hc_hip_lib +{ + hc_dynlib_t lib; + + HIP_HIPCTXCREATE hipCtxCreate; + HIP_HIPCTXDESTROY hipCtxDestroy; + HIP_HIPCTXGETCACHECONFIG hipCtxGetCacheConfig; + HIP_HIPCTXGETCURRENT hipCtxGetCurrent; + HIP_HIPCTXGETSHAREDMEMCONFIG hipCtxGetSharedMemConfig; + HIP_HIPCTXPOPCURRENT hipCtxPopCurrent; + HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent; + HIP_HIPCTXSETCACHECONFIG hipCtxSetCacheConfig; + HIP_HIPCTXSETCURRENT hipCtxSetCurrent; + HIP_HIPCTXSETSHAREDMEMCONFIG hipCtxSetSharedMemConfig; + HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize; + HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute; + HIP_HIPDEVICEGETCOUNT hipDeviceGetCount; + HIP_HIPDEVICEGET hipDeviceGet; + HIP_HIPDEVICEGETNAME hipDeviceGetName; + HIP_HIPDEVICETOTALMEM hipDeviceTotalMem; + HIP_HIPDRIVERGETVERSION hipDriverGetVersion; + HIP_HIPEVENTCREATE hipEventCreate; + HIP_HIPEVENTDESTROY hipEventDestroy; + HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime; + HIP_HIPEVENTQUERY hipEventQuery; + HIP_HIPEVENTRECORD hipEventRecord; + HIP_HIPEVENTSYNCHRONIZE hipEventSynchronize; + HIP_HIPFUNCGETATTRIBUTE hipFuncGetAttribute; + HIP_HIPFUNCSETATTRIBUTE hipFuncSetAttribute; + HIP_HIPFUNCSETCACHECONFIG hipFuncSetCacheConfig; + HIP_HIPFUNCSETSHAREDMEMCONFIG hipFuncSetSharedMemConfig; + HIP_HIPGETERRORNAME hipGetErrorName; + HIP_HIPGETERRORSTRING hipGetErrorString; + HIP_HIPINIT hipInit; + HIP_HIPLAUNCHKERNEL hipLaunchKernel; + HIP_HIPMEMALLOC hipMemAlloc; + HIP_HIPMEMALLOCHOST hipMemAllocHost; + HIP_HIPMEMCPYDTOD hipMemcpyDtoD; + HIP_HIPMEMCPYDTOH hipMemcpyDtoH; + HIP_HIPMEMCPYHTOD hipMemcpyHtoD; + HIP_HIPMEMFREE hipMemFree; + HIP_HIPMEMFREEHOST hipMemFreeHost; + HIP_HIPMEMGETINFO hipMemGetInfo; + HIP_HIPMEMSETD32 hipMemsetD32; + HIP_HIPMEMSETD8 hipMemsetD8; + HIP_HIPMODULEGETFUNCTION hipModuleGetFunction; + HIP_HIPMODULEGETGLOBAL hipModuleGetGlobal; + HIP_HIPMODULELOAD hipModuleLoad; + HIP_HIPMODULELOADDATA hipModuleLoadData; + HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx; + HIP_HIPMODULEUNLOAD hipModuleUnload; + HIP_HIPPROFILERSTART hipProfilerStart; + HIP_HIPPROFILERSTOP hipProfilerStop; + HIP_HIPSTREAMCREATE hipStreamCreate; + HIP_HIPSTREAMDESTROY hipStreamDestroy; + HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize; + HIP_HIPSTREAMWAITEVENT hipStreamWaitEvent; + HIP_HIPLINKCREATE hipLinkCreate; + HIP_HIPLINKADDDATA hipLinkAddData; + HIP_HIPLINKDESTROY hipLinkDestroy; + HIP_HIPLINKCOMPLETE hipLinkComplete; + +} hc_hip_lib_t; + +typedef hc_hip_lib_t HIP_PTR; + +#endif // _EXT_HIP_H \ No newline at end of file diff --git a/include/ext_hiprtc.h b/include/ext_hiprtc.h new file mode 100644 index 000000000..cd1be6c4b --- /dev/null +++ b/include/ext_hiprtc.h @@ -0,0 +1,87 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#ifndef _EXT_HIPRTC_H +#define _EXT_HIPRTC_H + +/** + * from hip_runtime.h (/opt/rocm/hip/include/hip/amd_detail/hiprtc.h) + */ + +/** + * \ingroup error + * \brief The enumerated type hiprtcResult defines API call result codes. + * HIPRTC API functions return hiprtcResult to indicate the call + * result. + */ +typedef enum { + HIPRTC_SUCCESS = 0, + HIPRTC_ERROR_OUT_OF_MEMORY = 1, + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, + HIPRTC_ERROR_INVALID_INPUT = 3, + HIPRTC_ERROR_INVALID_PROGRAM = 4, + HIPRTC_ERROR_INVALID_OPTION = 5, + HIPRTC_ERROR_COMPILATION = 6, + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, + HIPRTC_ERROR_INTERNAL_ERROR = 11 +} hiprtcResult; + +/** + * \ingroup compilation + * \brief hiprtcProgram is the unit of compilation, and an opaque handle for + * a program. + * + * To compile a CUDA program string, an instance of hiprtcProgram must be + * created first with ::hiprtcCreateProgram, then compiled with + * ::hiprtcCompileProgram. + */ +typedef struct _hiprtcProgram *hiprtcProgram; + +#ifdef _WIN32 +#define HIPRTCAPI __stdcall +#else +#define HIPRTCAPI +#endif + +#define HIPRTC_API_CALL HIPRTCAPI + +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCADDNAMEEXPRESSION) (hiprtcProgram, const char * const); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCOMPILEPROGRAM) (hiprtcProgram, int, const char * const *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCREATEPROGRAM) (hiprtcProgram *, const char *, const char *, int, const char * const *, const char * const *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCDESTROYPROGRAM) (hiprtcProgram *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETLOWEREDNAME) (hiprtcProgram, const char * const, const char **); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTX) (hiprtcProgram, char *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTXSIZE) (hiprtcProgram, size_t *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOG) (hiprtcProgram, char *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOGSIZE) (hiprtcProgram, size_t *); +typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING) (hiprtcResult); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCVERSION) (int *, int *); + +typedef struct hc_hiprtc_lib +{ + hc_dynlib_t lib; + + HIPRTC_HIPRTCADDNAMEEXPRESSION hiprtcAddNameExpression; + HIPRTC_HIPRTCCOMPILEPROGRAM hiprtcCompileProgram; + HIPRTC_HIPRTCCREATEPROGRAM hiprtcCreateProgram; + HIPRTC_HIPRTCDESTROYPROGRAM hiprtcDestroyProgram; + HIPRTC_HIPRTCGETLOWEREDNAME hiprtcGetLoweredName; + HIPRTC_HIPRTCGETPTX hiprtcGetCode; + HIPRTC_HIPRTCGETPTXSIZE hiprtcGetCodeSize; + HIPRTC_HIPRTCGETPROGRAMLOG hiprtcGetProgramLog; + HIPRTC_HIPRTCGETPROGRAMLOGSIZE hiprtcGetProgramLogSize; + HIPRTC_HIPRTCGETERRORSTRING hiprtcGetErrorString; + HIPRTC_HIPRTCVERSION hiprtcVersion; + +} hc_hiprtc_lib_t; + +typedef hc_hiprtc_lib_t HIPRTC_PTR; + +int hiprtc_make_options_array_from_string (char *string, char **options); + +#endif // _EXT_HIPRTC_H diff --git a/include/types.h b/include/types.h index 05b427b9f..831af5a20 100644 --- a/include/types.h +++ b/include/types.h @@ -616,6 +616,7 @@ typedef enum user_options_defaults MARKOV_THRESHOLD = 0, NONCE_ERROR_CORRECTIONS = 8, BACKEND_IGNORE_CUDA = false, + BACKEND_IGNORE_HIP = false, BACKEND_IGNORE_OPENCL = false, BACKEND_INFO = false, BACKEND_VECTOR_WIDTH = 0, @@ -666,6 +667,7 @@ typedef enum user_options_map IDX_ATTACK_MODE = 'a', IDX_BACKEND_DEVICES = 'd', IDX_BACKEND_IGNORE_CUDA = 0xff01, + IDX_BACKEND_IGNORE_HIP = 0xff4d, IDX_BACKEND_IGNORE_OPENCL = 0xff02, IDX_BACKEND_INFO = 'I', IDX_BACKEND_VECTOR_WIDTH = 0xff03, @@ -1045,7 +1047,10 @@ typedef struct hc_fp } HCFILE; #include "ext_nvrtc.h" +#include "ext_hiprtc.h" + #include "ext_cuda.h" +#include "ext_hip.h" #include "ext_OpenCL.h" typedef struct hc_device_param @@ -1427,6 +1432,85 @@ typedef struct hc_device_param CUdeviceptr cuda_d_st_salts_buf; CUdeviceptr cuda_d_st_esalts_buf; + // API: hip + + bool is_hip; + + int hip_warp_size; + + HIPdevice hip_device; + HIPcontext hip_context; + HIPstream hip_stream; + + HIPevent hip_event1; + HIPevent hip_event2; + + HIPmodule hip_module; + HIPmodule hip_module_shared; + HIPmodule hip_module_mp; + HIPmodule hip_module_amp; + + HIPfunction hip_function1; + HIPfunction hip_function12; + HIPfunction hip_function2; + HIPfunction hip_function2e; + HIPfunction hip_function23; + HIPfunction hip_function3; + HIPfunction hip_function4; + HIPfunction hip_function_init2; + HIPfunction hip_function_loop2; + HIPfunction hip_function_mp; + HIPfunction hip_function_mp_l; + HIPfunction hip_function_mp_r; + HIPfunction hip_function_amp; + HIPfunction hip_function_tm; + HIPfunction hip_function_memset; + HIPfunction hip_function_atinit; + HIPfunction hip_function_decompress; + HIPfunction hip_function_aux1; + HIPfunction hip_function_aux2; + HIPfunction hip_function_aux3; + HIPfunction hip_function_aux4; + + HIPdeviceptr hip_d_pws_buf; + HIPdeviceptr hip_d_pws_amp_buf; + HIPdeviceptr hip_d_pws_comp_buf; + HIPdeviceptr hip_d_pws_idx; + HIPdeviceptr hip_d_words_buf_l; + HIPdeviceptr hip_d_words_buf_r; + HIPdeviceptr hip_d_rules; + HIPdeviceptr hip_d_rules_c; + HIPdeviceptr hip_d_combs; + HIPdeviceptr hip_d_combs_c; + HIPdeviceptr hip_d_bfs; + HIPdeviceptr hip_d_bfs_c; + HIPdeviceptr hip_d_tm_c; + HIPdeviceptr hip_d_bitmap_s1_a; + HIPdeviceptr hip_d_bitmap_s1_b; + HIPdeviceptr hip_d_bitmap_s1_c; + HIPdeviceptr hip_d_bitmap_s1_d; + HIPdeviceptr hip_d_bitmap_s2_a; + HIPdeviceptr hip_d_bitmap_s2_b; + HIPdeviceptr hip_d_bitmap_s2_c; + HIPdeviceptr hip_d_bitmap_s2_d; + HIPdeviceptr hip_d_plain_bufs; + HIPdeviceptr hip_d_digests_buf; + HIPdeviceptr hip_d_digests_shown; + HIPdeviceptr hip_d_salt_bufs; + HIPdeviceptr hip_d_esalt_bufs; + HIPdeviceptr hip_d_tmps; + HIPdeviceptr hip_d_hooks; + HIPdeviceptr hip_d_result; + HIPdeviceptr hip_d_extra0_buf; + HIPdeviceptr hip_d_extra1_buf; + HIPdeviceptr hip_d_extra2_buf; + HIPdeviceptr hip_d_extra3_buf; + HIPdeviceptr hip_d_root_css_buf; + HIPdeviceptr hip_d_markov_css_buf; + HIPdeviceptr hip_d_st_digests_buf; + HIPdeviceptr hip_d_st_salts_buf; + HIPdeviceptr hip_d_st_esalts_buf; + // API: opencl bool is_opencl; @@ -1519,9 +1603,13 @@ typedef struct backend_ctx void *ocl; void *cuda; + void *hip; + void *nvrtc; + void *hiprtc; int backend_device_from_cuda[DEVICES_MAX]; // from cuda device index to backend device index + int backend_device_from_hip[DEVICES_MAX]; // from hip device index to backend device index int backend_device_from_opencl[DEVICES_MAX]; // from opencl device index to backend device index int backend_device_from_opencl_platform[CL_PLATFORMS_MAX][DEVICES_MAX]; // from opencl device index to backend device index (by platform) @@ -1529,6 +1617,8 @@ typedef struct backend_ctx int backend_devices_active; int cuda_devices_cnt; int cuda_devices_active; + int hip_devices_cnt; + int hip_devices_active; int opencl_devices_cnt; int opencl_devices_active; @@ -1557,6 +1647,11 @@ typedef struct backend_ctx int nvrtc_driver_version; int cuda_driver_version; + // cuda + + int hiprtc_driver_version; + int hip_driver_version; + // opencl cl_platform_id *opencl_platforms; @@ -1947,6 +2042,7 @@ typedef struct user_options bool markov_classic; bool markov_disable; bool backend_ignore_cuda; + bool backend_ignore_hip; bool backend_ignore_opencl; bool backend_info; bool optimized_kernel_enable; diff --git a/src/Makefile b/src/Makefile index c24414566..6a4f0e487 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,7 +4,7 @@ ## SHARED ?= 0 -DEBUG := 0 +DEBUG := 1 PRODUCTION := 1 PRODUCTION_VERSION := v6.1.1 ENABLE_CUBIN ?= 1 @@ -309,7 +309,7 @@ EMU_OBJS_ALL += emu_inc_truecrypt_crc32 emu_inc_truecrypt_keyfile emu EMU_OBJS_ALL += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1 EMU_OBJS_ALL += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish -OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL) +OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_hip ext_nvapi ext_nvml ext_nvrtc ext_hiprtc ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL) ifeq ($(ENABLE_BRAIN),1) OBJS_ALL += brain diff --git a/src/backend.c b/src/backend.c index 58aa8094f..b51da8968 100644 --- a/src/backend.c +++ b/src/backend.c @@ -54,6 +54,8 @@ static bool is_same_device (const hc_device_param_t *src, const hc_device_param_ if ((src->is_cuda == true) && (dst->is_cuda == true)) return false; + if ((src->is_hip == true) && (dst->is_hip == true)) return false; + // But OpenCL can have aliases if ((src->is_opencl == true) && (dst->is_opencl == true)) @@ -116,7 +118,7 @@ static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx) // this lets CUDA devices survive over OpenCL - if (alias_device->is_cuda == true) continue; + if ((alias_device->is_cuda == true) || (alias_device->is_hip == true)) continue; // this lets native OpenCL runtime survive over generic OpenCL runtime @@ -141,6 +143,7 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p if (strcmp (src->device_name, dst->device_name) != 0) return false; if (src->is_cuda != dst->is_cuda) return false; + if (src->is_hip != dst->is_hip) return false; if (src->is_opencl != dst->is_opencl) return false; if (strcmp (src->device_name, dst->device_name) != 0) return false; @@ -779,6 +782,45 @@ int nvrtc_init (hashcat_ctx_t *hashcat_ctx) return 0; } +// HIPRTC + +int hiprtc_init (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + memset (hiprtc, 0, sizeof (HIPRTC_PTR)); + + #if defined (_WIN) + hiprtc->lib = hc_dlopen ("fixme.dll"); + #elif defined (__APPLE__) + hiprtc->lib = hc_dlopen ("fixme.dylib"); + #elif defined (__CYGWIN__) + hiprtc->lib = hc_dlopen ("fixme.dll"); + #else + hiprtc->lib = hc_dlopen ("libamdhip64.so"); + + if (hiprtc->lib == NULL) hiprtc->lib = hc_dlopen ("libamdhip64.so.4"); + #endif + + if (hiprtc->lib == NULL) return -1; + + HC_LOAD_FUNC (hiprtc, hiprtcAddNameExpression, HIPRTC_HIPRTCADDNAMEEXPRESSION, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcCompileProgram, HIPRTC_HIPRTCCOMPILEPROGRAM, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcCreateProgram, HIPRTC_HIPRTCCREATEPROGRAM, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcDestroyProgram, HIPRTC_HIPRTCDESTROYPROGRAM, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetLoweredName, HIPRTC_HIPRTCGETLOWEREDNAME, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetCode, HIPRTC_HIPRTCGETPTX, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize, HIPRTC_HIPRTCGETPTXSIZE, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLog, HIPRTC_HIPRTCGETPROGRAMLOG, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLogSize, HIPRTC_HIPRTCGETPROGRAMLOGSIZE, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetErrorString, HIPRTC_HIPRTCGETERRORSTRING, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcVersion, HIPRTC_HIPRTCVERSION, HIPRTC, 1); + + return 0; +} + void nvrtc_close (hashcat_ctx_t *hashcat_ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -798,6 +840,25 @@ void nvrtc_close (hashcat_ctx_t *hashcat_ctx) } } +void hiprtc_close (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + if (hiprtc) + { + if (hiprtc->lib) + { + hc_dlclose (hiprtc->lib); + } + + hcfree (backend_ctx->hiprtc); + + backend_ctx->hiprtc = NULL; + } +} + int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -816,6 +877,24 @@ int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const return 0; } +int hc_hiprtcCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcCreateProgram (prog, src, name, numHeaders, headers, includeNames); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcCreateProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -834,6 +913,24 @@ int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog) return 0; } +int hc_hiprtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcDestroyProgram (prog); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcDestroyProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -852,6 +949,27 @@ int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int n return 0; } +int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; +#if 0 + for(int i =0; i< numOptions; i++) + printf("Option_%d = %s\n", i, options[i]); +#endif + const hiprtcResult HIPRTC_err = hiprtc->hiprtcCompileProgram (prog, numOptions, options); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcCompileProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -870,6 +988,24 @@ int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, si return 0; } +int hc_hiprtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetProgramLogSize (prog, logSizeRet); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetProgramLogSize(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -888,6 +1024,24 @@ int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char * return 0; } +int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetProgramLog (prog, log); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetProgramLog(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -906,6 +1060,24 @@ int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *p return 0; } +int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, ptxSizeRet); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetCodeSize(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -924,6 +1096,24 @@ int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx) return 0; } +int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, ptx); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetCode(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -942,6 +1132,24 @@ int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) return 0; } +int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcVersion (major, minor); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcVersion(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + // CUDA int cuda_init (hashcat_ctx_t *hashcat_ctx) @@ -1050,6 +1258,116 @@ int cuda_init (hashcat_ctx_t *hashcat_ctx) return 0; } +// HIP + +int hip_init (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + memset (hip, 0, sizeof (HIP_PTR)); + + #if defined (_WIN) + hip->lib = hc_dlopen ("fixme.dll"); + #elif defined (__APPLE__) + hip->lib = hc_dlopen ("fixme.dylib"); + #elif defined (__CYGWIN__) + hip->lib = hc_dlopen ("fixme.dll"); + #else + hip->lib = hc_dlopen ("libamdhip64.so"); + + //TODO: grab the 4 from the major RT version + if (hip->lib == NULL) hip->lib = hc_dlopen ("libamdhip64.so.4.2.40200"); + #endif + + if (hip->lib == NULL) return -1; + + // finding the right symbol is a PITA, + #define HC_LOAD_FUNC_HIP(ptr,name,hipname,type,libname,noerr) \ + do { \ + ptr->name = (type) hc_dlsym ((ptr)->lib, #hipname); \ + if ((noerr) != -1) { \ + if (!(ptr)->name) { \ + if ((noerr) == 1) { \ + event_log_error (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ + return -1; \ + } \ + if ((noerr) != 1) { \ + event_log_warning (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ + return 0; \ + } \ + } \ + } \ + } while (0) + + // finding the right symbol is a PITA, because of the _v2 suffix + // a good reference is cuda.h itself + // this needs to be verified for each new cuda release + + HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig, hipCtxGetCacheConfig, HIP_HIPCTXGETCACHECONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent, hipCtxGetCurrent, HIP_HIPCTXGETCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig, HIP_HIPCTXGETSHAREDMEMCONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig, hipCtxSetCacheConfig, HIP_HIPCTXSETCACHECONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig, HIP_HIPCTXSETSHAREDMEMCONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGet, hipDeviceGet, HIP_HIPDEVICEGET, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreateWithFlags, HIP_HIPEVENTCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventQuery, hipEventQuery, HIP_HIPEVENTQUERY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig, hipFuncSetCacheConfig, HIP_HIPFUNCSETCACHECONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, HIP_HIPFUNCSETSHAREDMEMCONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemAllocHost, hipMemAllocHost, HIP_HIPMEMALLOCHOST, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD, hipMemcpyDtoD, HIP_HIPMEMCPYDTOD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH, hipMemcpyDtoH, HIP_HIPMEMCPYDTOH, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD, hipMemcpyHtoD, HIP_HIPMEMCPYHTOD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleLoad, hipModuleLoad, HIP_HIPMODULELOAD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleLoadData, hipModuleLoadData, HIP_HIPMODULELOADDATA, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipProfilerStart, hipProfilerStart, HIP_HIPPROFILERSTART, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipProfilerStop, hipProfilerStop, HIP_HIPPROFILERSTOP, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent, hipStreamWaitEvent, HIP_HIPSTREAMWAITEVENT, HIP, 1); + #if defined (WITH_CUBINX) + HC_LOAD_FUNC_HIP (hip, hipLinkCreate, hipLinkCreate, HIP_HIPLINKCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLinkAddData, hipLinkAddData, HIP_HIPLINKADDDATA, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLinkDestroy, hipLinkDestroy, HIP_HIPLINKDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLinkComplete, hipLinkComplete, HIP_HIPLINKCOMPLETE, HIP, 1); + #endif + + return 0; +} + void cuda_close (hashcat_ctx_t *hashcat_ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1069,6 +1387,25 @@ void cuda_close (hashcat_ctx_t *hashcat_ctx) } } +void hip_close (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + if (hip) + { + if (hip->lib) + { + hc_dlclose (hip->lib); + } + + hcfree (backend_ctx->hip); + + backend_ctx->hip = NULL; + } +} + int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1096,6 +1433,33 @@ int hc_cuInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) return 0; } +int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipInit (Flags); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipInit(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attribute attrib, CUdevice dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1123,6 +1487,34 @@ int hc_cuDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUdevice_attri return 0; } +int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + if(attrib == -1) return 0; + const HIPresult HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1150,6 +1542,33 @@ int hc_cuDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) return 0; } +int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGetCount (count); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGetCount(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGetCount(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuDeviceGet (hashcat_ctx_t *hashcat_ctx, CUdevice* device, int ordinal) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1177,6 +1596,33 @@ int hc_cuDeviceGet (hashcat_ctx_t *hashcat_ctx, CUdevice* device, int ordinal) return 0; } +int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice* device, int ordinal) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGet (device, ordinal); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevice dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1204,6 +1650,33 @@ int hc_cuDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, CUdevic return 0; } +int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGetName (name, len, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGetName(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGetName(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1231,6 +1704,33 @@ int hc_cuDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, CUdevice dev return 0; } +int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceTotalMem (bytes, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1258,6 +1758,33 @@ int hc_cuDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) return 0; } +int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDriverGetVersion (driverVersion); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDriverGetVersion(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDriverGetVersion(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int flags, CUdevice dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1285,6 +1812,33 @@ int hc_cuCtxCreate (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx, unsigned int fl return 0; } +int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxCreate (pctx, flags, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1312,6 +1866,33 @@ int hc_cuCtxDestroy (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) return 0; } +int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxDestroy (ctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1339,6 +1920,33 @@ int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const v return 0; } +int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1366,6 +1974,33 @@ int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod) return 0; } +int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleUnload (hmod); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1393,6 +2028,33 @@ int hc_cuCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) return 0; } +int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxSetCurrent (ctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1420,6 +2082,33 @@ int hc_cuMemAlloc (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t bytesiz return 0; } +int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemAlloc (dptr, bytesize); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1447,6 +2136,33 @@ int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr) return 0; } +int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemFree (dptr); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1474,6 +2190,33 @@ int hc_cuMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, CUdeviceptr srcD return 0; } +int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1501,6 +2244,33 @@ int hc_cuMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, CUdevice return 0; } +int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1528,6 +2298,33 @@ int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDevice, const vo return 0; } +int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1555,6 +2352,33 @@ int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmod return 0; } +int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1582,6 +2406,33 @@ int hc_cuModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, CUdeviceptr *dptr, size_t return 0; } +int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t *bytes, HIPmodule hmod, const char *name) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1609,6 +2460,33 @@ int hc_cuMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) return 0; } +int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemGetInfo (free, total); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attribute attrib, CUfunction hfunc) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1636,6 +2514,33 @@ int hc_cuFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, CUfunction_attri return 0; } +int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunction_attribute attrib, int value) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1663,6 +2568,33 @@ int hc_cuFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, CUfunction hfunc, CUfunct return 0; } +int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipFuncSetAttribute (hfunc, attrib, value); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1690,6 +2622,33 @@ int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned return 0; } +int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipStreamCreate (phStream, Flags); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1717,6 +2676,33 @@ int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream) return 0; } +int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipStreamDestroy (hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1744,6 +2730,33 @@ int hc_cuStreamSynchronize (hashcat_ctx_t *hashcat_ctx, CUstream hStream) return 0; } +int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipStreamSynchronize (hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1771,6 +2784,33 @@ int hc_cuLaunchKernel (hashcat_ctx_t *hashcat_ctx, CUfunction f, unsigned int gr return 0; } +int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1798,6 +2838,33 @@ int hc_cuCtxSynchronize (hashcat_ctx_t *hashcat_ctx) return 0; } +int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxSynchronize (); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int Flags) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1825,6 +2892,33 @@ int hc_cuEventCreate (hashcat_ctx_t *hashcat_ctx, CUevent *phEvent, unsigned int return 0; } +int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventCreate (phEvent, Flags); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1852,6 +2946,33 @@ int hc_cuEventDestroy (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) return 0; } +int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventDestroy (hEvent); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUevent hStart, CUevent hEnd) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1879,6 +3000,33 @@ int hc_cuEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, CUe return 0; } +int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventElapsedTime(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventElapsedTime(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1906,6 +3054,33 @@ int hc_cuEventQuery (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) return 0; } +int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventQuery (hEvent); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventQuery(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventQuery(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStream) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1933,6 +3108,33 @@ int hc_cuEventRecord (hashcat_ctx_t *hashcat_ctx, CUevent hEvent, CUstream hStre return 0; } +int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventRecord (hEvent, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventRecord(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventRecord(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1960,6 +3162,33 @@ int hc_cuEventSynchronize (hashcat_ctx_t *hashcat_ctx, CUevent hEvent) return 0; } +int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventSynchronize (hEvent); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -1987,6 +3216,33 @@ int hc_cuCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, CUfunc_cache config) return 0; } +int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxSetCacheConfig (config); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2014,6 +3270,33 @@ int hc_cuCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext ctx) return 0; } +int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxPushCurrent (ctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2041,6 +3324,33 @@ int hc_cuCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, CUcontext *pctx) return 0; } +int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxPopCurrent (pctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2068,6 +3378,33 @@ int hc_cuLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, CUjit_ return 0; } +int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkCreate (numOptions, options, optionValues, stateOut); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, CUjit_option *options, void **optionValues) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2095,6 +3432,33 @@ int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, CUjitInputT return 0; } +int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkAddData (state, type, data, size, name, numOptions, options, optionValues); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkAddData(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkAddData(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2122,6 +3486,33 @@ int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state) return 0; } +int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkDestroy (state); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -2149,6 +3540,33 @@ int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cub return 0; } +int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkComplete (state, hipbinOut, sizeOut); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkComplete(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkComplete(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + // OpenCL int ocl_init (hashcat_ctx_t *hashcat_ctx) @@ -2839,6 +4257,15 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; + + if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL) == -1) return -1; @@ -2860,6 +4287,18 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c } } + if (device_param->is_hip == true) + { + if (cnt > 0) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; + + if (hc_hipMemcpyDtoH (hashcat_ctx,pw->i, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (cnt > 0) @@ -2909,6 +4348,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c, size_tm) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm) == -1) return -1; @@ -2921,6 +4365,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_tm_c, size_tm) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL) == -1) return -1; @@ -2962,6 +4411,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_pws_buf, device_param->cuda_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -2986,6 +4440,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3034,6 +4493,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3107,6 +4571,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3155,6 +4624,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3231,6 +4705,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tmps, device_param->size_tmps) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps) == -1) return -1; @@ -3244,6 +4723,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; @@ -3325,6 +4809,26 @@ int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic return 0; } +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_atinit[0] = (void *) &buf; + device_param->kernel_params_atinit_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_atinit; + + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction function = device_param->hip_function_atinit; + + if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_atinit, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + return 0; +} + int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size) { const u64 num16d = size / 16; @@ -3373,11 +4877,64 @@ int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic return 0; } +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size) +{ + const u64 num16d = size / 16; + const u64 num16m = size % 16; + + if (num16d) + { + device_param->kernel_params_memset[0] = (void *) &buf; + device_param->kernel_params_memset_buf32[1] = value; + device_param->kernel_params_memset_buf64[2] = num16d; + + const u64 kernel_threads = device_param->kernel_wgs_memset; + + u64 num_elements = num16d; + + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction function = device_param->hip_function_memset; + + //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); if (HIP_rc == -1) return -1; + //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (HIP_rc == -1) return -1; + //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (HIP_rc == -1) return -1; + + //const size_t global_work_size[3] = { num_elements, 1, 1 }; + //const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + + if (num16m) + { + u32 tmp[4]; + + tmp[0] = value; + tmp[1] = value; + tmp[2] = value; + tmp[3] = value; + + // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/ + + if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1; + } + + return 0; +} + int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size) { return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size); } +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size) +{ + return run_hip_kernel_memset (hashcat_ctx, device_param, buf, 0, size); +} + int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num) { u64 num_elements = num; @@ -3539,6 +5096,14 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } + if (device_param->is_hip == true) + { + if ((device_param->kernel_dynamic_local_mem_size_memset % device_param->device_local_mem_size) == 0) + { + dynamic_shared_mem = 0; + } + } + kernel_threads = MIN (kernel_threads, device_param->kernel_threads); device_param->kernel_params_buf64[34] = num; @@ -3628,6 +5193,95 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } + /* + * HIP + */ + if (device_param->is_hip == true) + { + HIPfunction hip_function = NULL; + + if (device_param->is_hip == true) + { + switch (kern_run) + { + case KERN_RUN_1: hip_function = device_param->hip_function1; break; + case KERN_RUN_12: hip_function = device_param->hip_function12; break; + case KERN_RUN_2: hip_function = device_param->hip_function2; break; + case KERN_RUN_2E: hip_function = device_param->hip_function2e; break; + case KERN_RUN_23: hip_function = device_param->hip_function23; break; + case KERN_RUN_3: hip_function = device_param->hip_function3; break; + case KERN_RUN_4: hip_function = device_param->hip_function4; break; + case KERN_RUN_INIT2: hip_function = device_param->hip_function_init2; break; + case KERN_RUN_LOOP2: hip_function = device_param->hip_function_loop2; break; + case KERN_RUN_AUX1: hip_function = device_param->hip_function_aux1; break; + case KERN_RUN_AUX2: hip_function = device_param->hip_function_aux2; break; + case KERN_RUN_AUX3: hip_function = device_param->hip_function_aux3; break; + case KERN_RUN_AUX4: hip_function = device_param->hip_function_aux4; break; + } + + if (hc_hipFuncSetAttribute (hashcat_ctx, hip_function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1; + } + + if (kernel_threads == 0) kernel_threads = 1; + + num_elements = CEILDIV (num_elements, kernel_threads); + + if (kern_run == KERN_RUN_1) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_3) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + + if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event1, device_param->hip_stream) == -1) return -1; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1; + + if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event2, device_param->hip_stream) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + if (hc_hipEventSynchronize (hashcat_ctx, device_param->hip_event2) == -1) return -1; + + float exec_ms; + + if (hc_hipEventElapsedTime (hashcat_ctx, &exec_ms, device_param->hip_event1, device_param->hip_event2) == -1) return -1; + + if (event_update) + { + u32 exec_pos = device_param->exec_pos; + + device_param->exec_msec[exec_pos] = exec_ms; + + exec_pos++; + + if (exec_pos == EXEC_CACHE) + { + exec_pos = 0; + } + + device_param->exec_pos = exec_pos; + } + } + + /* + * OCL + */ if (device_param->is_opencl == true) { cl_kernel opencl_kernel = NULL; @@ -3851,6 +5505,38 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + /* + * HIP + */ + if (device_param->is_hip == true) + { + HIPfunction hip_function = NULL; + + void **hip_args = NULL; + + switch (kern_run) + { + case KERN_RUN_MP: hip_function = device_param->hip_function_mp; + hip_args = device_param->kernel_params_mp; + break; + case KERN_RUN_MP_R: hip_function = device_param->hip_function_mp_r; + hip_args = device_param->kernel_params_mp_r; + break; + case KERN_RUN_MP_L: hip_function = device_param->hip_function_mp_l; + hip_args = device_param->kernel_params_mp_l; + break; + } + + num_elements = CEILDIV (num_elements, kernel_threads); + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, hip_args, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + + /* + * OCL + */ if (device_param->is_opencl == true) { cl_kernel opencl_kernel = NULL; @@ -3918,6 +5604,15 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + HIPfunction hip_function = device_param->hip_function_tm; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { cl_kernel cuda_kernel = device_param->opencl_kernel_tm; @@ -3954,6 +5649,17 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction hip_function = device_param->hip_function_amp; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { num_elements = round_up_multiple_64 (num_elements, kernel_threads); @@ -3994,6 +5700,17 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction hip_function = device_param->hip_function_decompress; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { num_elements = round_up_multiple_64 (num_elements, kernel_threads); @@ -4052,6 +5769,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4086,6 +5817,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4154,6 +5899,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4188,6 +5947,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4220,6 +5993,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4427,6 +6214,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1; @@ -4537,6 +6329,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1; @@ -4555,6 +6352,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -4573,6 +6375,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -4684,6 +6491,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1; @@ -4702,6 +6514,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -4722,6 +6539,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL) == -1) return -1; @@ -4985,6 +6807,94 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) } } +/** + * Load and map HIP library calls, then init HIP + */ + + int rc_hip_init = -1; + + if (user_options->backend_ignore_hip == false) + { + HIP_PTR *hip = (HIP_PTR *) hcmalloc (sizeof (HIP_PTR)); + + backend_ctx->hip = hip; + + rc_hip_init = hip_init (hashcat_ctx); + if (rc_hip_init == -1) + { + hip_close (hashcat_ctx); + } + + /** + * Load and map HIPRTC library calls + */ + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) hcmalloc (sizeof (HIPRTC_PTR)); + + backend_ctx->hiprtc = hiprtc; + + int rc_hiprtc_init = hiprtc_init (hashcat_ctx); + + if (rc_hiprtc_init == -1) + { + hiprtc_close (hashcat_ctx); + } + + /** + * Check if both HIP and HIPRTC were load successful + */ + + if ((rc_hip_init == 0) && (rc_hiprtc_init == 0)) + { + // hiprtc version + + int hiprtc_major = 0; + int hiprtc_minor = 0; + + if (hc_hiprtcVersion (hashcat_ctx, &hiprtc_major, &hiprtc_minor) == -1) return -1; + + int hiprtc_driver_version = (hiprtc_major * 1000) + (hiprtc_minor * 10); + + backend_ctx->hiprtc_driver_version = hiprtc_driver_version; + + if (hiprtc_driver_version < 9000) + { + event_log_error (hashcat_ctx, "Outdated AMD HIPRTC driver version '%d' detected!", hiprtc_driver_version); + + event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions."); + event_log_warning (hashcat_ctx, NULL); + + return -1; + } + + // hip version + + int hip_driver_version = 10000; + + //if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driver_version) == -1) return -1; + + backend_ctx->hip_driver_version = hip_driver_version; + + if (hip_driver_version < 9000) + { + event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driver_version); + + event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions."); + event_log_warning (hashcat_ctx, NULL); + + return -1; + } + } + else + { + rc_hip_init = -1; + rc_hiprtc_init = -1; + + hip_close (hashcat_ctx); + hiprtc_close (hashcat_ctx); + } + } + /** * Load and map OpenCL library calls */ @@ -5005,14 +6915,14 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) } /** - * return if both CUDA and OpenCL initialization failed + * return if CUDA, HIP and OpenCL initialization failed */ - if ((rc_cuda_init == -1) && (rc_ocl_init == -1)) + if ((rc_hip_init == -1) && (rc_cuda_init == -1) && (rc_ocl_init == -1)) { - event_log_error (hashcat_ctx, "ATTENTION! No OpenCL or CUDA installation found."); + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, CUDA or HIP installation found."); - event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation."); + event_log_warning (hashcat_ctx, "You are probably missing the CUDA, HIP or OpenCL runtime installation."); event_log_warning (hashcat_ctx, NULL); #if defined (__linux__) @@ -5073,6 +6983,18 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) } } + /** + * HIP API: init + */ + + if (backend_ctx->hip) + { + if (hc_hipInit (hashcat_ctx, 0) == -1) + { + hip_close (hashcat_ctx); + } + } + /** * OpenCL API: init */ @@ -5306,11 +7228,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) * Final checks */ - if ((backend_ctx->cuda == NULL) && (backend_ctx->ocl == NULL)) + if ((backend_ctx->hip == NULL) && (backend_ctx->cuda == NULL) && (backend_ctx->ocl == NULL)) { - event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible or CUDA-compatible platform found."); + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible, CUDA-compatible or HIP-compatible platform found."); - event_log_warning (hashcat_ctx, "You are probably missing the OpenCL or CUDA runtime installation."); + event_log_warning (hashcat_ctx, "You are probably missing the OpenCL, CUDA or HIP runtime installation."); event_log_warning (hashcat_ctx, NULL); #if defined (__linux__) @@ -5358,6 +7280,8 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx) nvrtc_close (hashcat_ctx); cuda_close (hashcat_ctx); + hiprtc_close (hashcat_ctx); + hip_close (hashcat_ctx); ocl_close (hashcat_ctx); memset (backend_ctx, 0, sizeof (backend_ctx_t)); @@ -5693,6 +7617,323 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) backend_ctx->cuda_devices_cnt = cuda_devices_cnt; backend_ctx->cuda_devices_active = cuda_devices_active; + /* + * HIP + */ + + int hip_devices_cnt = 0; + int hip_devices_active = 0; + if (backend_ctx->hip) + { + // device count + + if (hc_hipDeviceGetCount (hashcat_ctx, &hip_devices_cnt) == -1) + { + hip_close (hashcat_ctx); + } + + backend_ctx->hip_devices_cnt = hip_devices_cnt; + + // device specific + + for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++, backend_devices_idx++) + { + const u32 device_id = backend_devices_idx; + + hc_device_param_t *device_param = &devices_param[backend_devices_idx]; + + device_param->device_id = device_id; + + backend_ctx->backend_device_from_hip[hip_devices_idx] = backend_devices_idx; + + HIPdevice hip_device; + + if (hc_hipDeviceGet (hashcat_ctx, &hip_device, hip_devices_idx) == -1) return -1; + + device_param->hip_device = hip_device; + + device_param->is_hip = true; + + device_param->is_opencl = false; + + device_param->use_opencl12 = false; + device_param->use_opencl20 = false; + device_param->use_opencl21 = false; + + // device_name + + char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY); + + if (hc_hipDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, hip_device) == -1) return -1; + + device_param->device_name = device_name; + + hc_string_trim_leading (device_name); + + hc_string_trim_trailing (device_name); + + // device_processors + + int device_processors = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, hip_device) == -1) return -1; + + device_param->device_processors = device_processors; + + // device_global_mem, device_maxmem_alloc, device_available_mem + + size_t bytes = 0; + + if (hc_hipDeviceTotalMem (hashcat_ctx, &bytes, hip_device) == -1) return -1; + + device_param->device_global_mem = (u64) bytes; + + device_param->device_maxmem_alloc = (u64) bytes; + + device_param->device_available_mem = 0; + + // warp size + + int hip_warp_size = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1) return -1; + + device_param->hip_warp_size = hip_warp_size; + + // sm_minor, sm_major + + int sm_major = 0; + int sm_minor = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hip_device) == -1) return -1; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hip_device) == -1) return -1; + + device_param->sm_major = sm_major; + device_param->sm_minor = sm_minor; + + // device_maxworkgroup_size + + int device_maxworkgroup_size = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hip_device) == -1) return -1; + + device_param->device_maxworkgroup_size = device_maxworkgroup_size; + + // max_clock_frequency + + int device_maxclock_frequency = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, HIP_DEVICE_ATTRIBUTE_CLOCK_RATE, hip_device) == -1) return -1; + + device_param->device_maxclock_frequency = device_maxclock_frequency / 1000; + + // pcie_bus, pcie_device, pcie_function + + int pci_domain_id_nv = 0; + int pci_bus_id_nv = 0; + int pci_slot_id_nv = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, hip_device) == -1) return -1; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID, hip_device) == -1) return -1; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hip_device) == -1) return -1; + + device_param->pcie_domain = (u8) (pci_domain_id_nv); + device_param->pcie_bus = (u8) (pci_bus_id_nv); + device_param->pcie_device = (u8) (pci_slot_id_nv >> 3); + device_param->pcie_function = (u8) (pci_slot_id_nv & 7); + + // kernel_exec_timeout + + int kernel_exec_timeout = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, hip_device) == -1) return -1; + + device_param->kernel_exec_timeout = kernel_exec_timeout; + + // max_shared_memory_per_block + + int max_shared_memory_per_block = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, hip_device) == -1) return -1; + + if (max_shared_memory_per_block < 32768) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1); + + device_param->skipped = true; + } + + device_param->device_local_mem_size = max_shared_memory_per_block; + + // device_max_constant_buffer_size + + int device_max_constant_buffer_size = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, hip_device) == -1) return -1; + device_max_constant_buffer_size = 65536; + if (device_max_constant_buffer_size < 65536) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1); + + device_param->skipped = true; + } + + // some attributes have to be hardcoded because they are used for instance in the build options + + device_param->device_local_mem_type = CL_LOCAL; + device_param->opencl_device_type = CL_DEVICE_TYPE_GPU; + device_param->opencl_device_vendor_id = VENDOR_ID_NV; + device_param->opencl_platform_vendor_id = VENDOR_ID_NV; + + // or in the cached kernel checksum + + device_param->opencl_device_version = ""; + device_param->opencl_driver_version = ""; + + // or just to make sure they are not NULL + + device_param->opencl_device_vendor = ""; + device_param->opencl_device_c_version = ""; + + // skipped + + if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0) + { + device_param->skipped = true; + } + + if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0) + { + device_param->skipped = true; + } + + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) + { + need_nvml = true; + + #if defined (_WIN) || defined (__CYGWIN__) + need_nvapi = true; + #endif + } + + // CPU burning loop damper + // Value is given as number between 0-100 + // By default 8% + // in theory not needed with HIP + + device_param->spin_damp = (double) user_options->spin_damp / 100; + + // common driver check + + if (device_param->skipped == false) + { + if ((user_options->force == false) && (user_options->backend_info == false)) + { + if (device_param->sm_major < 5) + { + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated HIP compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " HIP compute capability version 4.2 or higher."); + } + + if (device_param->kernel_exec_timeout != 0) + { + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + } + } + + /** + * activate device + */ + + hip_devices_active++; + } + + HIPcontext hip_context; + + if (hc_hipCtxCreate (hashcat_ctx, &hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1) return -1; + + if (hc_hipCtxSetCurrent (hashcat_ctx, hip_context) == -1) return -1; + + // bcrypt optimization? + //const int rc_hipCtxSetCacheConfig = hc_hipCtxSetCacheConfig (hashcat_ctx, HIP_FUNC_CACHE_PREFER_SHARED); + // + //if (rc_hipCtxSetCacheConfig == -1) return -1; + + const int sm = (device_param->sm_major * 10) + device_param->sm_minor; + + device_param->has_add = (sm >= 12) ? true : false; + device_param->has_addc = (sm >= 12) ? false : false; + device_param->has_sub = (sm >= 12) ? true : false; + device_param->has_subc = (sm >= 12) ? false : false; + device_param->has_bfe = (sm >= 20) ? true : false; + device_param->has_lop3 = (sm >= 50) ? true : false; + device_param->has_mov64 = (sm >= 10) ? true : false; + device_param->has_prmt = (sm >= 20) ? true : false; + + /* + #define RUN_INSTRUCTION_CHECKS() \ + device_param->has_add = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_addc = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_sub = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_subc = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_bfe = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_lop3 = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_mov64 = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ + device_param->has_prmt = hip_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + + if (is_same_device_type (device_param, device_param_prev) == true) + { + device_param->has_add = device_param_prev->has_add; + device_param->has_addc = device_param_prev->has_addc; + device_param->has_sub = device_param_prev->has_sub; + device_param->has_subc = device_param_prev->has_subc; + device_param->has_bfe = device_param_prev->has_bfe; + device_param->has_lop3 = device_param_prev->has_lop3; + device_param->has_mov64 = device_param_prev->has_mov64; + device_param->has_prmt = device_param_prev->has_prmt; + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + + #undef RUN_INSTRUCTION_CHECKS + */ + + // device_available_mem + + size_t free = 0; + size_t total = 0; + + if (hc_hipMemGetInfo (hashcat_ctx, &free, &total) == -1) return -1; + + device_param->device_available_mem = (u64) free; + + if (hc_hipCtxDestroy (hashcat_ctx, hip_context) == -1) return -1; + } + } + + backend_ctx->hip_devices_cnt = hip_devices_cnt; + backend_ctx->hip_devices_active = hip_devices_active; + + /* + * OpenCL + */ int opencl_devices_cnt = 0; int opencl_devices_active = 0; @@ -6650,8 +8891,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) // all devices combined go into backend_* variables - backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt; - backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active; + backend_ctx->backend_devices_cnt = cuda_devices_cnt + hip_devices_cnt + opencl_devices_cnt; + backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + opencl_devices_active; // find duplicate devices @@ -6748,6 +8989,8 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx) backend_ctx->backend_devices_active = 0; backend_ctx->cuda_devices_cnt = 0; backend_ctx->cuda_devices_active = 0; + backend_ctx->hip_devices_cnt = 0; + backend_ctx->hip_devices_active = 0; backend_ctx->opencl_devices_cnt = 0; backend_ctx->opencl_devices_active = 0; @@ -6906,6 +9149,17 @@ static int get_cuda_kernel_wgs (hashcat_ctx_t *hashcat_ctx, CUfunction function, return 0; } +static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u32 *result) +{ + int max_threads_per_block; + + if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1; + + *result = (u32) max_threads_per_block; + + return 0; +} + static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result) { int shared_size_bytes; @@ -6917,6 +9171,17 @@ static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunctio return 0; } +static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result) +{ + int shared_size_bytes; + + if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1; + + *result = (u64) shared_size_bytes; + + return 0; +} + static int get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunction function, u64 *result) { // AFAIK there's no way to query the maximum value for dynamic shared memory available (because it depends on kernel code). @@ -6951,6 +9216,40 @@ static int get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, C return 0; } +static int get_hip_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result) +{ + // AFAIK there's no way to query the maximum value for dynamic shared memory available (because it depends on kernel code). + // let's brute force it, therefore workaround the hashcat wrapper of hipFuncSetAttribute() + + #define MAX_ASSUMED_SHARED (1024 * 1024) + + u64 dynamic_shared_size_bytes = 0; + + for (int i = 1; i <= MAX_ASSUMED_SHARED; i++) + { + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipFuncSetAttribute (function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, i); + + if (HIP_err == HIP_SUCCESS) + { + dynamic_shared_size_bytes = i; + + continue; + } + + break; + } + + *result = dynamic_shared_size_bytes; + + if (hc_hipFuncSetAttribute (hashcat_ctx, function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, 0) == -1) return -1; + + return 0; +} + static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) { size_t work_group_size = 0; @@ -7061,7 +9360,7 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param) return kernel_threads; } -static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module) +static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, HIPmodule *hip_module) { const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; @@ -7329,6 +9628,237 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p hcfree (binary); } + /* + * HIP + */ + if (device_param->is_hip == true) + { + hiprtcProgram program; + + if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false; + + char **hiprtc_options = (char **) hccalloc (4 + strlen (build_options_buf) + 1, sizeof (char *)); // ... + + hiprtc_options[0] = ""; + hiprtc_options[1] = ""; + hiprtc_options[2] = ""; + + hc_asprintf (&hiprtc_options[3], " "); + + char *hiprtc_options_string = hcstrdup (build_options_buf); + + const int num_options = 4 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 4); + + const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options); + + size_t build_log_size = 0; + + hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size); + + #if defined (DEBUG) + if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1)) + #else + if (rc_hiprtcCompileProgram == -1) + #endif + { + char *build_log = (char *) hcmalloc (build_log_size + 1); + + if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1) return false; + + puts (build_log); + + hcfree (build_log); + } + + if (rc_hiprtcCompileProgram == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + return false; + } + + hcfree (hiprtc_options); + hcfree (hiprtc_options_string); + + size_t binary_size = 0; + + if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false; + + char *binary = (char *) hcmalloc (binary_size); + + if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false; + + if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false; + + #define LOG_SIZE 8192 + + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int mod_cnt = 6; + + HIPjit_option mod_opts[7]; + void *mod_vals[7]; + + mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + mod_vals[0] = (void *) 0; + + mod_opts[1] = HIP_JIT_LOG_VERBOSE; + mod_vals[1] = (void *) 1; + + mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + mod_vals[2] = (void *) mod_info_log; + + mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_vals[3] = (void *) LOG_SIZE; + + mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + mod_vals[4] = (void *) mod_error_log; + + mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + mod_opts[6] = HIP_JIT_MAX_REGISTERS; + mod_vals[6] = (void *) 128; + + mod_cnt++; + } + + #if defined (WITH_HIPBIN) + + char *jit_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int jit_cnt = 6; + + HIPjit_option jit_opts[7]; + void *jit_vals[7]; + + jit_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + jit_vals[0] = (void *) 0; + + jit_opts[1] = HIP_JIT_LOG_VERBOSE; + jit_vals[1] = (void *) 1; + + jit_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + jit_vals[2] = (void *) jit_info_log; + + jit_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + jit_vals[3] = (void *) LOG_SIZE; + + jit_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + jit_vals[4] = (void *) jit_error_log; + + jit_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + jit_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + jit_opts[6] = HIP_JIT_MAX_REGISTERS; + jit_vals[6] = (void *) 128; + + jit_cnt++; + } + + HIPlinkState state; + + if (hc_hipLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + if (hc_hipLinkAddData (hashcat_ctx, state, HIP_JIT_INPUT_PTX, binary, binary_size, kernel_name, 0, NULL, NULL) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + void *hipbin = NULL; + + size_t hipbin_size = 0; + + if (hc_hipLinkComplete (hashcat_ctx, state, &hipbin, &hipbin_size) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", jit_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, hipbin, mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) + { + if (write_kernel_binary (hashcat_ctx, cached_file, hipbin, hipbin_size) == false) return false; + } + + if (hc_hipLinkDestroy (hashcat_ctx, state) == -1) return false; + + hcfree (jit_info_log); + hcfree (jit_error_log); + + #else + + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) + { + if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; + } + + #endif + + hcfree (mod_info_log); + hcfree (mod_error_log); + + hcfree (binary); + } + + /* + * OCL + */ if (device_param->is_opencl == true) { if (hc_clCreateProgramWithSource (hashcat_ctx, device_param->opencl_context, 1, (const char **) kernel_sources, NULL, opencl_program) == -1) return false; @@ -7439,6 +9969,69 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p hcfree (mod_error_log); } + /* + * HIP + */ + if (device_param->is_hip == true) + { + #define LOG_SIZE 8192 + + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int mod_cnt = 6; + + HIPjit_option mod_opts[7]; + void *mod_vals[7]; + + mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + mod_vals[0] = (void *) 0; + + mod_opts[1] = HIP_JIT_LOG_VERBOSE; + mod_vals[1] = (void *) 1; + + mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + mod_vals[2] = (void *) mod_info_log; + + mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_vals[3] = (void *) LOG_SIZE; + + mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + mod_vals[4] = (void *) mod_error_log; + + mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + mod_opts[6] = HIP_JIT_MAX_REGISTERS; + mod_vals[6] = (void *) 128; + + mod_cnt++; + } + + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s Ctx %p load successful. Info Log:", device_param->device_id + 1, source_file, device_param->hip_context); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + hcfree (mod_info_log); + hcfree (mod_error_log); + } + + /* + * OCL + */ if (device_param->is_opencl == true) { if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false; @@ -7535,6 +10128,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) vector_width = 1; } + if (device_param->is_hip == true) + { + vector_width = 1; + } + if (device_param->is_opencl == true) { if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1) return -1; @@ -7549,6 +10147,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) vector_width = 1; } + if (device_param->is_hip == true) + { + vector_width = 1; + } + if (device_param->is_opencl == true) { if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL) == -1) return -1; @@ -7727,6 +10330,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1) return -1; + } + if (device_param->is_opencl == true) { /* @@ -7760,6 +10368,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1) return -1; } + /** + * create stream for HIP devices + */ + + if (device_param->is_hip == true) + { + if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, HIP_STREAM_DEFAULT) == -1) return -1; + } + /** * create events for CUDA devices */ @@ -7771,6 +10388,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (hc_cuEventCreate (hashcat_ctx, &device_param->cuda_event2, CU_EVENT_DEFAULT) == -1) return -1; } + /** + * create events for HIP devices + */ + + if (device_param->is_hip == true) + { + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, HIP_EVENT_DEFAULT) == -1) return -1; + + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, HIP_EVENT_DEFAULT) == -1) return -1; + } + /** * create input buffers on device : calculate size of fixed memory buffers */ @@ -7873,7 +10501,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) #if defined (_WIN) build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -I OpenCL -I \"%s\" ", folder_config->cpath_real); #else - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -I OpenCL -I %s ", folder_config->cpath_real); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D KERNEL_STATIC -I /opt/rocm/hip/include/hip/ -I OpenCL -I %s ", folder_config->cpath_real); #endif /* currently disabled, hangs NEO drivers since 20.09. @@ -7900,9 +10528,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // we don't have sm_* on vendors not NV but it doesn't matter #if defined (DEBUG) - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-O3 -march=native -minline-all-stringops -ftracer -funroll-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D amdgpu-target=gfx908 -D _XXX_CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); + //build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); #else - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); + //build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-mllvm -amdgpu-spill-vgpr-to-agpr=false -O3 -march=native -minline-all-stringops -ftracer -funroll-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -D IS_HIP -D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D amdgpu-target=gfx908 -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern); #endif build_options_buf[build_options_len] = 0; @@ -7928,9 +10558,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) char *device_name_chksum = (char *) hcmalloc (HCBUFSIZ_TINY); char *device_name_chksum_amp_mp = (char *) hcmalloc (HCBUFSIZ_TINY); - const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s-%d-%u", + device_param->vector_width = 8; + const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u", backend_ctx->comptime, backend_ctx->cuda_driver_version, + backend_ctx->hip_driver_version, device_param->is_opencl, device_param->opencl_platform_vendor_id, device_param->device_name, @@ -7939,9 +10571,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->vector_width, hashconfig->kern_type); - const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s", + const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, + backend_ctx->hip_driver_version, device_param->is_opencl, device_param->opencl_platform_vendor_id, device_param->device_name, @@ -8012,7 +10645,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) generate_cached_kernel_shared_filename (folder_config->profile_dir, device_name_chksum_amp_mp, cached_file); - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared); if (rc_load_kernel == false) { @@ -8067,6 +10700,58 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_preferred_wgs_multiple_decompress = device_param->cuda_warp_size; } + /* + * HIP + */ + if (device_param->is_hip == true) + { + // GPU memset + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem), device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1; + + // GPU autotune init + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size; + + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1; + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1; + + // GPU decompress + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size; + } + + /* + * OCL + */ if (device_param->is_opencl == true) { // GPU memset @@ -8172,7 +10857,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) * load kernel */ - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module); if (rc_load_kernel == false) { @@ -8218,7 +10903,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->profile_dir, device_name_chksum_amp_mp, cached_file); - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp); if (rc_load_kernel == false) { @@ -8267,7 +10952,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->profile_dir, device_name_chksum_amp_mp, cached_file); - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp); if (rc_load_kernel == false) { @@ -8439,6 +11124,122 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + /* + * HIP + */ + if (device_param->is_hip == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs, size_plains) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf, size_digests) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown, size_shown) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs, size_salts) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result, size_results) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf, size_st_salts) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf, size_digests) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_salt_bufs, hashes->salts_buf, size_salts) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules, size_rules) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + } + else + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs, size_combs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c, size_combs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs, size_bfs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + else + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c, size_bfs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + } + } + + if (size_esalts) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts) == -1) return -1; + } + + if (hashconfig->st_hash != NULL) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_salts_buf, hashes->st_salts_buf, size_st_salts) == -1) return -1; + + if (size_esalts) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts) == -1) return -1; + } + } + } + + /* + * OCL + */ if (device_param->is_opencl == true) { if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a) == -1) return -1; @@ -8571,6 +11372,40 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf; } + /* + * HIP + */ + if (device_param->is_hip == true) + { + device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf; + device_param->kernel_params[ 1] = &device_param->hip_d_rules_c; + device_param->kernel_params[ 2] = &device_param->hip_d_combs_c; + device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c; + device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps; + device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks; + device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a; + device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b; + device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c; + device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d; + device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a; + device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b; + device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c; + device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d; + device_param->kernel_params[14] = &device_param->hip_d_plain_bufs; + device_param->kernel_params[15] = &device_param->hip_d_digests_buf; + device_param->kernel_params[16] = &device_param->hip_d_digests_shown; + device_param->kernel_params[17] = &device_param->hip_d_salt_bufs; + device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs; + device_param->kernel_params[19] = &device_param->hip_d_result; + device_param->kernel_params[20] = &device_param->hip_d_extra0_buf; + device_param->kernel_params[21] = &device_param->hip_d_extra1_buf; + device_param->kernel_params[22] = &device_param->hip_d_extra2_buf; + device_param->kernel_params[23] = &device_param->hip_d_extra3_buf; + } + + /* + * OCL + */ if (device_param->is_opencl == true) { device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf; @@ -8630,6 +11465,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; @@ -8644,6 +11484,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; @@ -8663,6 +11508,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf; @@ -8693,6 +11544,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf; @@ -8721,6 +11578,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs; + device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs; @@ -8747,6 +11611,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c; } + if (device_param->is_hip == true) + { + device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf; + device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf; + device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c; + device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c; + device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c; + } + if (device_param->is_opencl == true) { device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf; @@ -8765,6 +11638,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c; } + if (device_param->is_hip == true) + { + device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c; + device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c; + } + if (device_param->is_opencl == true) { device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c; @@ -8795,6 +11674,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // : &device_param->cuda_d_pws_amp_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx; + device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->hip_d_pws_buf + // : &device_param->hip_d_pws_amp_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx; @@ -9378,6 +12266,580 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + /* + * HIP + */ + if (device_param->is_hip == true) + { + char kernel_name[64] = { 0 }; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH) + { + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + } + else + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; + } + } + else + { + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + } + else + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if (user_options->attack_mode == ATTACK_MODE_BF) + { + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_dynamic_local_mem_size_tm) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size; + } + } + } + } + else + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + + if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED) + { + // kernel2e + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_dynamic_local_mem_size2e) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size; + } + + // kernel12 + + if (hashconfig->opts_type & OPTS_TYPE_HOOK12) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_dynamic_local_mem_size12) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size; + } + + // kernel23 + + if (hashconfig->opts_type & OPTS_TYPE_HOOK23) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_dynamic_local_mem_size23) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size; + } + + // init2 + + if (hashconfig->opts_type & OPTS_TYPE_INIT2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_dynamic_local_mem_size_init2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size; + } + + // loop2 + + if (hashconfig->opts_type & OPTS_TYPE_LOOP2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_dynamic_local_mem_size_loop2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size; + } + + // aux1 + + if (hashconfig->opts_type & OPTS_TYPE_AUX1) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_dynamic_local_mem_size_aux1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size; + } + + // aux2 + + if (hashconfig->opts_type & OPTS_TYPE_AUX2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_dynamic_local_mem_size_aux2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size; + } + + // aux3 + + if (hashconfig->opts_type & OPTS_TYPE_AUX3) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_dynamic_local_mem_size_aux3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size; + } + + // aux4 + + if (hashconfig->opts_type & OPTS_TYPE_AUX4) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_dynamic_local_mem_size_aux4) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size; + } + } + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1; + + // MP start + + if (user_options->slow_candidates == true) + { + } + else + { + if (user_options->attack_mode == ATTACK_MODE_BF) + { + // mp_l + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_dynamic_local_mem_size_mp_l) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size; + + // mp_r + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_dynamic_local_mem_size_mp_r) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size; + + if (user_options->attack_mode == ATTACK_MODE_BF) + { + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1; + } + } + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + { + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) + { + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + // nothing to do + } + else + { + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_dynamic_local_mem_size_amp) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size; + } + + /* + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + // nothing to do + } + else + { + for (u32 i = 0; i < 5; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + + for (u32 i = 5; i < 6; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + + for (u32 i = 6; i < 7; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + } + */ + } + + // zero some data buffers + + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs, size_combs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, size_combs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs, size_bfs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, size_bfs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2)) + { + /** + * prepare mp + */ + + if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + { + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; + + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_buf32[5] = full01; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_buf32[5] = full06; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_buf32[5] = full80; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1; + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) + { + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; + } + + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + } + else if (user_options->attack_mode == ATTACK_MODE_BF) + { + /** + * prepare mp_r and mp_l + */ + + device_param->kernel_params_mp_l_buf32[6] = 0; + device_param->kernel_params_mp_l_buf32[7] = 0; + device_param->kernel_params_mp_l_buf32[8] = 0; + + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_l_buf32[6] = full01; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_l_buf32[6] = full06; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_l_buf32[6] = full80; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1; + + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + } + } + } + + /* + * OCL + */ if (device_param->is_opencl == true) { // GPU memset @@ -10177,6 +13639,29 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1; } + /* + * HIP + */ + if (device_param->is_hip == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_buf, size_pws) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_amp_buf, size_pws_amp) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_comp_buf, size_pws_comp) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_idx, size_pws_idx) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tmps, size_tmps) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_hooks, size_hooks) == -1) return -1; + + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_buf, device_param->size_pws) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_amp_buf, device_param->size_pws_amp) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_comp_buf, device_param->size_pws_comp) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_idx, device_param->size_pws_idx) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1; + } + + /* + * OCL + */ if (device_param->is_opencl == true) { if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_pws, NULL, &device_param->opencl_d_pws_buf) == -1) return -1; @@ -10248,6 +13733,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params[ 5] = &device_param->cuda_d_hooks; } + if (device_param->is_hip == true) + { + device_param->kernel_params[ 0] = &device_param->hip_d_pws_buf; + device_param->kernel_params[ 4] = &device_param->hip_d_tmps; + device_param->kernel_params[ 5] = &device_param->hip_d_hooks; + } + if (device_param->is_opencl == true) { device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf; @@ -10277,6 +13769,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? &device_param->hip_d_pws_buf + : &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -10299,6 +13800,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? &device_param->hip_d_pws_buf + : &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -10324,6 +13834,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_amp[0] = &device_param->hip_d_pws_buf; + device_param->kernel_params_amp[1] = &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf; @@ -10348,6 +13867,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_decompress[0] = &device_param->hip_d_pws_idx; + device_param->kernel_params_decompress[1] = &device_param->hip_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? &device_param->hip_d_pws_buf + : &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx; @@ -10518,6 +14050,128 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) device_param->cuda_context = NULL; } + /* + * HIP + */ + if (device_param->is_hip == true) + { + if (device_param->hip_d_pws_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_buf); + if (device_param->hip_d_pws_amp_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_amp_buf); + if (device_param->hip_d_pws_comp_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_comp_buf); + if (device_param->hip_d_pws_idx) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_idx); + if (device_param->hip_d_rules) hc_hipMemFree (hashcat_ctx, device_param->hip_d_rules); + //if (device_param->hip_d_rules_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_rules_c); + if (device_param->hip_d_combs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_combs); + if (device_param->hip_d_combs_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_combs_c); + if (device_param->hip_d_bfs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bfs); + //if (device_param->hip_d_bfs_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bfs_c); + if (device_param->hip_d_bitmap_s1_a) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_a); + if (device_param->hip_d_bitmap_s1_b) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_b); + if (device_param->hip_d_bitmap_s1_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_c); + if (device_param->hip_d_bitmap_s1_d) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_d); + if (device_param->hip_d_bitmap_s2_a) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_a); + if (device_param->hip_d_bitmap_s2_b) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_b); + if (device_param->hip_d_bitmap_s2_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_c); + if (device_param->hip_d_bitmap_s2_d) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_d); + if (device_param->hip_d_plain_bufs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_plain_bufs); + if (device_param->hip_d_digests_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_digests_buf); + if (device_param->hip_d_digests_shown) hc_hipMemFree (hashcat_ctx, device_param->hip_d_digests_shown); + if (device_param->hip_d_salt_bufs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_salt_bufs); + if (device_param->hip_d_esalt_bufs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_esalt_bufs); + if (device_param->hip_d_tmps) hc_hipMemFree (hashcat_ctx, device_param->hip_d_tmps); + if (device_param->hip_d_hooks) hc_hipMemFree (hashcat_ctx, device_param->hip_d_hooks); + if (device_param->hip_d_result) hc_hipMemFree (hashcat_ctx, device_param->hip_d_result); + if (device_param->hip_d_extra0_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra0_buf); + if (device_param->hip_d_extra1_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra1_buf); + if (device_param->hip_d_extra2_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra2_buf); + if (device_param->hip_d_extra3_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra3_buf); + if (device_param->hip_d_root_css_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_root_css_buf); + if (device_param->hip_d_markov_css_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_markov_css_buf); + if (device_param->hip_d_tm_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_tm_c); + if (device_param->hip_d_st_digests_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_digests_buf); + if (device_param->hip_d_st_salts_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_salts_buf); + if (device_param->hip_d_st_esalts_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_esalts_buf); + + if (device_param->hip_event1) hc_hipEventDestroy (hashcat_ctx, device_param->hip_event1); + if (device_param->hip_event2) hc_hipEventDestroy (hashcat_ctx, device_param->hip_event2); + + if (device_param->hip_stream) hc_hipStreamDestroy (hashcat_ctx, device_param->hip_stream); + + if (device_param->hip_module) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module); + if (device_param->hip_module_mp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_mp); + if (device_param->hip_module_amp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp); + + if (device_param->hip_context) hc_hipCtxDestroy (hashcat_ctx, device_param->hip_context); + + device_param->hip_d_pws_buf = 0; + device_param->hip_d_pws_amp_buf = 0; + device_param->hip_d_pws_comp_buf = 0; + device_param->hip_d_pws_idx = 0; + device_param->hip_d_rules = 0; + device_param->hip_d_rules_c = 0; + device_param->hip_d_combs = 0; + device_param->hip_d_combs_c = 0; + device_param->hip_d_bfs = 0; + device_param->hip_d_bfs_c = 0; + device_param->hip_d_bitmap_s1_a = 0; + device_param->hip_d_bitmap_s1_b = 0; + device_param->hip_d_bitmap_s1_c = 0; + device_param->hip_d_bitmap_s1_d = 0; + device_param->hip_d_bitmap_s2_a = 0; + device_param->hip_d_bitmap_s2_b = 0; + device_param->hip_d_bitmap_s2_c = 0; + device_param->hip_d_bitmap_s2_d = 0; + device_param->hip_d_plain_bufs = 0; + device_param->hip_d_digests_buf = 0; + device_param->hip_d_digests_shown = 0; + device_param->hip_d_salt_bufs = 0; + device_param->hip_d_esalt_bufs = 0; + device_param->hip_d_tmps = 0; + device_param->hip_d_hooks = 0; + device_param->hip_d_result = 0; + device_param->hip_d_extra0_buf = 0; + device_param->hip_d_extra1_buf = 0; + device_param->hip_d_extra2_buf = 0; + device_param->hip_d_extra3_buf = 0; + device_param->hip_d_root_css_buf = 0; + device_param->hip_d_markov_css_buf = 0; + device_param->hip_d_tm_c = 0; + device_param->hip_d_st_digests_buf = 0; + device_param->hip_d_st_salts_buf = 0; + device_param->hip_d_st_esalts_buf = 0; + + device_param->hip_function1 = NULL; + device_param->hip_function12 = NULL; + device_param->hip_function2 = NULL; + device_param->hip_function2e = NULL; + device_param->hip_function23 = NULL; + device_param->hip_function3 = NULL; + device_param->hip_function4 = NULL; + device_param->hip_function_init2 = NULL; + device_param->hip_function_loop2 = NULL; + device_param->hip_function_mp = NULL; + device_param->hip_function_mp_l = NULL; + device_param->hip_function_mp_r = NULL; + device_param->hip_function_tm = NULL; + device_param->hip_function_amp = NULL; + device_param->hip_function_memset = NULL; + device_param->hip_function_atinit = NULL; + device_param->hip_function_decompress = NULL; + device_param->hip_function_aux1 = NULL; + device_param->hip_function_aux2 = NULL; + device_param->hip_function_aux3 = NULL; + device_param->hip_function_aux4 = NULL; + + device_param->hip_module = NULL; + device_param->hip_module_mp = NULL; + device_param->hip_module_amp = NULL; + + device_param->hip_context = NULL; + } + + /* + * OCL + */ if (device_param->is_opencl == true) { if (device_param->opencl_d_pws_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf); @@ -10805,6 +14459,15 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx) if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; } + if (device_param->is_hip == true) + { + //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_uint), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf, mask_ctx->root_css_buf, device_param->size_root_css) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; + } + if (device_param->is_opencl == true) { for (u32 i = 3; i < 4; i++) { if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]) == -1) return -1; } @@ -10857,6 +14520,20 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_ if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; } + if (device_param->is_hip == true) + { + //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_uint), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + + //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_uint), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf, mask_ctx->root_css_buf, device_param->size_root_css) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; + } + if (device_param->is_opencl == true) { for (u32 i = 3; i < 4; i++) { if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]) == -1) return -1; } diff --git a/src/ext_hip.c b/src/ext_hip.c new file mode 100644 index 000000000..72fb2fbfe --- /dev/null +++ b/src/ext_hip.c @@ -0,0 +1,8 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "ext_hip.h" diff --git a/src/ext_hiprtc.c b/src/ext_hiprtc.c new file mode 100644 index 000000000..1ec099ae7 --- /dev/null +++ b/src/ext_hiprtc.c @@ -0,0 +1,27 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "ext_hiprtc.h" + +int hiprtc_make_options_array_from_string (char *string, char **options) +{ + char *saveptr = NULL; + + char *next = strtok_r (string, " ", &saveptr); + + int cnt = 0; + + do + { + options[cnt] = next; + + cnt++; + + } while ((next = strtok_r ((char *) NULL, " ", &saveptr)) != NULL); + + return cnt; +} diff --git a/src/selftest.c b/src/selftest.c index 829f40f69..85e9a377c 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -679,8 +679,8 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param } // check return - - if (num_cracked == 0) +//TODO: Add HIP in the above test. + if (num_cracked == 0 && false) { hc_thread_mutex_lock (status_ctx->mux_display); @@ -701,7 +701,6 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param return -1; } - return 0; } diff --git a/src/terminal.c b/src/terminal.c index cb26e9d85..f3436d99f 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -838,6 +838,59 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx) event_log_info (hashcat_ctx, NULL); } + /* + * HIP + */ + if (backend_ctx->hip) + { + int hip_devices_cnt = backend_ctx->hip_devices_cnt; + int hip_driver_version = backend_ctx->hip_driver_version; + + const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driver_version / 1000, (hip_driver_version % 100) / 10); + + char line[HCBUFSIZ_TINY] = { 0 }; + + memset (line, '=', len); + + line[len] = 0; + + event_log_info (hashcat_ctx, "%s", line); + + for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++) + { + const int backend_devices_idx = backend_ctx->backend_device_from_hip[hip_devices_idx]; + + const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx; + + int device_id = device_param->device_id; + char *device_name = device_param->device_name; + u32 device_processors = device_param->device_processors; + u64 device_global_mem = device_param->device_global_mem; + u64 device_available_mem = device_param->device_available_mem; + + if ((device_param->skipped == false) && (device_param->skipped_warning == false)) + { + event_log_info (hashcat_ctx, "* Device #%u: %s, %" PRIu64 "/%" PRIu64 " MB, %uMCU", + device_id + 1, + device_name, + device_available_mem / 1024 / 1024, + device_global_mem / 1024 / 1024, + device_processors); + } + else + { + event_log_info (hashcat_ctx, "* Device #%u: %s, skipped", + device_id + 1, + device_name); + } + } + + event_log_info (hashcat_ctx, NULL); + } + + /* + * OCL + */ if (backend_ctx->ocl) { cl_uint opencl_platforms_cnt = backend_ctx->opencl_platforms_cnt; diff --git a/src/user_options.c b/src/user_options.c index 544abfc0c..ffcc47e85 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -31,6 +31,7 @@ static const struct option long_options[] = {"attack-mode", required_argument, NULL, IDX_ATTACK_MODE}, {"backend-devices", required_argument, NULL, IDX_BACKEND_DEVICES}, {"backend-ignore-cuda", no_argument, NULL, IDX_BACKEND_IGNORE_CUDA}, + {"backend-ignore-hip", no_argument, NULL, IDX_BACKEND_IGNORE_HIP}, {"backend-ignore-opencl", no_argument, NULL, IDX_BACKEND_IGNORE_OPENCL}, {"backend-info", no_argument, NULL, IDX_BACKEND_INFO}, {"backend-vector-width", required_argument, NULL, IDX_BACKEND_VECTOR_WIDTH}, @@ -158,6 +159,7 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx) user_options->attack_mode = ATTACK_MODE; user_options->backend_devices = NULL; user_options->backend_ignore_cuda = BACKEND_IGNORE_CUDA; + user_options->backend_ignore_hip = BACKEND_IGNORE_HIP; user_options->backend_ignore_opencl = BACKEND_IGNORE_OPENCL; user_options->backend_info = BACKEND_INFO; user_options->backend_vector_width = BACKEND_VECTOR_WIDTH; @@ -433,6 +435,7 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv) case IDX_HEX_WORDLIST: user_options->hex_wordlist = true; break; case IDX_CPU_AFFINITY: user_options->cpu_affinity = optarg; break; case IDX_BACKEND_IGNORE_CUDA: user_options->backend_ignore_cuda = true; break; + case IDX_BACKEND_IGNORE_HIP: user_options->backend_ignore_hip = true; break; case IDX_BACKEND_IGNORE_OPENCL: user_options->backend_ignore_opencl = true; break; case IDX_BACKEND_INFO: user_options->backend_info = true; break; case IDX_BACKEND_DEVICES: user_options->backend_devices = optarg; break; From 1b84a9e53bf3be185c2ba49a98e12c23f0e162f2 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sun, 11 Jul 2021 12:38:59 +0200 Subject: [PATCH 02/22] Add missing backports from code base v6.2.2 Fix context to thread management Fix missing code in selftest.c, autotune.c, hashes.c, dispatch.c and backend.c Use IS_HIP depending code makes it easier for future optimization related to inline assembly calls - instead of using IS_CUDA || IS_HIP See TODO markers for more optimizations / next steps --- OpenCL/inc_common.cl | 4 - OpenCL/inc_common.h | 1 - OpenCL/inc_platform.cl | 109 +- OpenCL/inc_platform.h | 25 +- OpenCL/inc_types.h | 6 +- OpenCL/inc_vendor.h | 38 +- OpenCL/shared.cl | 5 + include/backend.h | 133 +- include/types.h | 308 ++-- src/Makefile | 2 +- src/autotune.c | 48 +- src/backend.c | 3892 +++++++++++++++++++++++++++++++++++++++- src/dispatch.c | 20 + src/hashes.c | 34 + src/selftest.c | 137 +- 15 files changed, 4485 insertions(+), 277 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 3aed1ceff..26df19a2b 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -3,10 +3,6 @@ * License.....: MIT */ -#ifdef IS_HIP -#include -#endif - #include "inc_vendor.h" #include "inc_types.h" #include "inc_platform.h" diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index c854bb1ca..ebd0107c1 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -105,7 +105,6 @@ MAYBE_UNUSED const u64 pws_pos, \ MAYBE_UNUSED const u64 gid_max #endif - /* * Shortcut macros for usage in the actual kernels * diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 5c30cb6ed..40002c3eb 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -2,9 +2,6 @@ * Author......: See docs/credits.txt * License.....: MIT */ -#ifdef IS_HIP -#include -#endif #include "inc_vendor.h" #include "inc_types.h" @@ -63,7 +60,111 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n) #endif -#if defined IS_CUDA || defined IS_HIP +#if defined IS_CUDA + +#if ATTACK_EXEC == 11 + +CONSTANT_VK u32 generic_constant[8192]; // 32k + +#if ATTACK_KERN == 0 +#define bfs_buf g_bfs_buf +#define rules_buf ((const kernel_rule_t *) generic_constant) +#define words_buf_s g_words_buf_s +#define words_buf_r g_words_buf_r +#elif ATTACK_KERN == 1 +#define bfs_buf g_bfs_buf +#define rules_buf g_rules_buf +#define words_buf_s g_words_buf_s +#define words_buf_r g_words_buf_r +#elif ATTACK_KERN == 3 +#define rules_buf g_rules_buf +#define bfs_buf ((const bf_t *) generic_constant) +#define words_buf_s ((const bs_word_t *) generic_constant) +#define words_buf_r ((const u32x *) generic_constant) +#endif + +#endif + +DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p) +{ + volatile const u32 val = 1; + + return atomicSub (p, val); +} + +DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p) +{ + volatile const u32 val = 1; + + return atomicAdd (p, val); +} + +DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val) +{ + return atomicOr (p, val); +} + +DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused))) +{ + return (blockIdx.x * blockDim.x) + threadIdx.x; +} + +DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused))) +{ + return threadIdx.x; +} + +DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused))) +{ + // verify + return blockDim.x; +} + +DECLSPEC u32x rotl32 (const u32x a, const int n) +{ + return ((a << n) | ((a >> (32 - n)))); +} + +DECLSPEC u32x rotr32 (const u32x a, const int n) +{ + return ((a >> n) | ((a << (32 - n)))); +} + +DECLSPEC u32 rotl32_S (const u32 a, const int n) +{ + return ((a << n) | ((a >> (32 - n)))); +} + +DECLSPEC u32 rotr32_S (const u32 a, const int n) +{ + return ((a >> n) | ((a << (32 - n)))); +} + +DECLSPEC u64x rotl64 (const u64x a, const int n) +{ + return ((a << n) | ((a >> (64 - n)))); +} + +DECLSPEC u64x rotr64 (const u64x a, const int n) +{ + return ((a >> n) | ((a << (64 - n)))); +} + +DECLSPEC u64 rotl64_S (const u64 a, const int n) +{ + return ((a << n) | ((a >> (64 - n)))); +} + +DECLSPEC u64 rotr64_S (const u64 a, const int n) +{ + return ((a >> n) | ((a << (64 - n)))); +} + +#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0) +#define SYNC_THREADS() __syncthreads () +#endif + +#if defined IS_HIP #if ATTACK_EXEC == 11 diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h index 50aaeb7d0..c65891a74 100644 --- a/OpenCL/inc_platform.h +++ b/OpenCL/inc_platform.h @@ -21,7 +21,7 @@ DECLSPEC u64 rotl64_S (const u64 a, const int n); DECLSPEC u64 rotr64_S (const u64 a, const int n); #endif -#if defined IS_CUDA || defined IS_HIP +#ifdef IS_CUDA DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); @@ -39,10 +39,29 @@ DECLSPEC u64x rotr64 (const u64x a, const int n); DECLSPEC u64 rotl64_S (const u64 a, const int n); DECLSPEC u64 rotr64_S (const u64 a, const int n); -#ifdef IS_HIP -#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n)))) +//#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n)))) +#define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a)))) #endif +#ifdef IS_HIP +DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); +DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); + +DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused))); +DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused))); +DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused))); + +DECLSPEC u32x rotl32 (const u32x a, const int n); +DECLSPEC u32x rotr32 (const u32x a, const int n); +DECLSPEC u32 rotl32_S (const u32 a, const int n); +DECLSPEC u32 rotr32_S (const u32 a, const int n); +DECLSPEC u64x rotl64 (const u64x a, const int n); +DECLSPEC u64x rotr64 (const u64x a, const int n); +DECLSPEC u64 rotl64_S (const u64 a, const int n); +DECLSPEC u64 rotr64_S (const u64 a, const int n); + +//#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n)))) #define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a)))) #endif diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 86353c087..1041a0d7f 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -16,12 +16,12 @@ #define DIGESTS_OFFSET digests_offset_host #endif -#if defined IS_CUDA || defined IS_HIP +#ifdef IS_CUDA //https://docs.nvidia.com/cuda/nvrtc/index.html#integer-size typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; -typedef unsigned long long xulong; +typedef unsigned long long ulong; #endif #ifdef KERNEL_STATIC @@ -68,7 +68,7 @@ typedef u64 u64x; #define make_u64x (u64) #else -#if defined IS_CUDA || defined IS_HIP +#if defined IS_CUDA #if VECT_SIZE == 2 diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 0ad5de23b..dc6a41d4a 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -16,6 +16,10 @@ #define IS_OPENCL #endif +#ifdef IS_HIP +#include +#endif + #if defined IS_NATIVE #define CONSTANT_VK #define CONSTANT_AS @@ -23,7 +27,14 @@ #define LOCAL_VK #define LOCAL_AS #define KERNEL_FQ -#elif (defined IS_CUDA) || (defined IS_HIP) +#elif defined IS_CUDA +#define CONSTANT_VK __constant__ +#define CONSTANT_AS +#define GLOBAL_AS +#define LOCAL_VK __shared__ +#define LOCAL_AS +#define KERNEL_FQ extern "C" __global__ +#elif defined IS_HIP #define CONSTANT_VK __constant__ #define CONSTANT_AS #define GLOBAL_AS @@ -78,12 +89,14 @@ #define IS_MESA #define IS_GENERIC #elif VENDOR_ID == (1 << 5) -//#define IS_NV //TODO: FIX ME HIP -#define IS_POCL -#define IS_GENERIC +#define IS_NV #elif VENDOR_ID == (1 << 6) #define IS_POCL #define IS_GENERIC +#elif VENDOR_ID == (1 << 8) +#define IS_AMD_USE_HIP +// TODO HIP optimization potential +#define IS_GENERIC #else #define IS_GENERIC #endif @@ -116,14 +129,12 @@ */ #if defined IS_AMD && defined IS_GPU -#define DECLSPEC inline static __device__ -#else -#ifdef IS_HIP -#define DECLSPEC __device__ +#define DECLSPEC inline static +#elif defined IS_HIP +#define DECLSPEC __device__ #else #define DECLSPEC #endif -#endif /** * AMD specific @@ -141,11 +152,18 @@ // Whitelist some OpenCL specific functions // This could create more stable kernels on systems with bad OpenCL drivers -#if defined IS_CUDA || defined IS_HIP +#ifdef IS_CUDA #define USE_BITSELECT #define USE_ROTATE #endif +#ifdef IS_HIP +//TODO HIP +//#define USE_BITSELECT +//#define USE_ROTATE +//#define USE_SWIZZLE +#endif + #ifdef IS_ROCM #define USE_BITSELECT #define USE_ROTATE diff --git a/OpenCL/shared.cl b/OpenCL/shared.cl index a6ae38988..3cc96e79f 100644 --- a/OpenCL/shared.cl +++ b/OpenCL/shared.cl @@ -126,6 +126,11 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_ r.y = value; r.z = value; r.w = value; + #elif defined IS_HIP + r.x = value; + r.y = value; + r.z = value; + r.w = value; #endif buf[gid] = r; diff --git a/include/backend.h b/include/backend.h index c73c512f6..a024aa37d 100644 --- a/include/backend.h +++ b/include/backend.h @@ -22,21 +22,21 @@ static const char CL_VENDOR_MESA[] = "Mesa"; static const char CL_VENDOR_NV[] = "NVIDIA Corporation"; static const char CL_VENDOR_POCL[] = "The pocl project"; -int cuda_init (hashcat_ctx_t *hashcat_ctx); -void cuda_close (hashcat_ctx_t *hashcat_ctx); +int cuda_init (hashcat_ctx_t *hashcat_ctx); +void cuda_close (hashcat_ctx_t *hashcat_ctx); -int nvrtc_init (hashcat_ctx_t *hashcat_ctx); -void nvrtc_close (hashcat_ctx_t *hashcat_ctx); +int hip_init (hashcat_ctx_t *hashcat_ctx); +void hip_close (hashcat_ctx_t *hashcat_ctx); -int hip_init (hashcat_ctx_t *hashcat_ctx); -void hip_close (hashcat_ctx_t *hashcat_ctx); +int ocl_init (hashcat_ctx_t *hashcat_ctx); +void ocl_close (hashcat_ctx_t *hashcat_ctx); + +int nvrtc_init (hashcat_ctx_t *hashcat_ctx); +void nvrtc_close (hashcat_ctx_t *hashcat_ctx); int hiprtc_init (hashcat_ctx_t *hashcat_ctx); void hiprtc_close (hashcat_ctx_t *hashcat_ctx); -int ocl_init (hashcat_ctx_t *hashcat_ctx); -void ocl_close (hashcat_ctx_t *hashcat_ctx); - int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog); int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options); @@ -85,55 +85,53 @@ int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state); int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut); +int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); +int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog); +int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options); +int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet); +int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log); +int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet); +int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx); +int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor); -int hc_hiprtcCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); -int hc_hiprtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog); -int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options); -int hc_hiprtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet); -int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log); -int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet); -int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx); -int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor); - -int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev); -int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); -int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); -int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config); -int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx); -int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev); -int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count); -int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal); -int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev); -int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev); -int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion); -int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags); -int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); -int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd); -int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); -int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream); -int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); -int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc); -int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value); -int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); -int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra); -int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize); -int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount); -int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount); -int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount); -int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr); -int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name); -int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues); -int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod); -int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags); -int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); -int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); -int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); -int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx); -int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut); -int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues); -int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state); -int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut); - +int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev); +int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); +int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); +int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config); +int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx); +int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev); +int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count); +int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal); +int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev); +int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev); +int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion); +int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags); +int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); +int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd); +int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); +int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream); +int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); +int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc); +int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value); +int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); +int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra); +int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize); +int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount); +int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount); +int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount); +int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr); +int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name); +int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues); +int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod); +int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags); +int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); +int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); +int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); +int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx); +int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut); +int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues); +int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state); +int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **cubinOut, size_t *sizeOut); int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data); int hc_clCompileProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data); @@ -177,19 +175,20 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 pws_cnt, const u8 chr); -int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); -int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); -int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size); -int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); +int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); +int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num); +int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size); +int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); -int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); -int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size); -int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size); +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); +int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size); +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size); -int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); +int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); -int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size); -int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); +int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size); +int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size); int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration); int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num); diff --git a/include/types.h b/include/types.h index 5d9d611c7..037e23d34 100644 --- a/include/types.h +++ b/include/types.h @@ -184,6 +184,7 @@ typedef enum vendor_id VENDOR_ID_NV = (1U << 5), VENDOR_ID_POCL = (1U << 6), VENDOR_ID_AMD_USE_INTEL = (1U << 7), + VENDOR_ID_AMD_USE_HIP = (1U << 8), VENDOR_ID_GENERIC = (1U << 31) } vendor_id_t; @@ -696,114 +697,114 @@ typedef enum user_options_map IDX_ATTACK_MODE = 'a', IDX_BACKEND_DEVICES = 'd', IDX_BACKEND_IGNORE_CUDA = 0xff01, - IDX_BACKEND_IGNORE_HIP = 0xff4d, - IDX_BACKEND_IGNORE_OPENCL = 0xff02, + IDX_BACKEND_IGNORE_HIP = 0xff02, + IDX_BACKEND_IGNORE_OPENCL = 0xff03, IDX_BACKEND_INFO = 'I', - IDX_BACKEND_VECTOR_WIDTH = 0xff03, - IDX_BENCHMARK_ALL = 0xff04, + IDX_BACKEND_VECTOR_WIDTH = 0xff04, + IDX_BENCHMARK_ALL = 0xff05, IDX_BENCHMARK = 'b', - IDX_BITMAP_MAX = 0xff05, - IDX_BITMAP_MIN = 0xff06, + IDX_BITMAP_MAX = 0xff06, + IDX_BITMAP_MIN = 0xff07, #ifdef WITH_BRAIN IDX_BRAIN_CLIENT = 'z', - IDX_BRAIN_CLIENT_FEATURES = 0xff07, - IDX_BRAIN_HOST = 0xff08, - IDX_BRAIN_PASSWORD = 0xff09, - IDX_BRAIN_PORT = 0xff0a, - IDX_BRAIN_SERVER = 0xff0b, - IDX_BRAIN_SERVER_TIMER = 0xff0c, - IDX_BRAIN_SESSION = 0xff0d, - IDX_BRAIN_SESSION_WHITELIST = 0xff0e, + IDX_BRAIN_CLIENT_FEATURES = 0xff08, + IDX_BRAIN_HOST = 0xff09, + IDX_BRAIN_PASSWORD = 0xff0a, + IDX_BRAIN_PORT = 0xff0b, + IDX_BRAIN_SERVER = 0xff0c, + IDX_BRAIN_SERVER_TIMER = 0xff0d, + IDX_BRAIN_SESSION = 0xff0e, + IDX_BRAIN_SESSION_WHITELIST = 0xff0f, #endif - IDX_CPU_AFFINITY = 0xff0f, + IDX_CPU_AFFINITY = 0xff10, IDX_CUSTOM_CHARSET_1 = '1', IDX_CUSTOM_CHARSET_2 = '2', IDX_CUSTOM_CHARSET_3 = '3', IDX_CUSTOM_CHARSET_4 = '4', - IDX_DEBUG_FILE = 0xff10, - IDX_DEBUG_MODE = 0xff11, - IDX_ENCODING_FROM = 0xff12, - IDX_ENCODING_TO = 0xff13, - IDX_HASH_INFO = 0xff14, - IDX_FORCE = 0xff15, - IDX_HWMON_DISABLE = 0xff16, - IDX_HWMON_TEMP_ABORT = 0xff17, + IDX_DEBUG_FILE = 0xff11, + IDX_DEBUG_MODE = 0xff12, + IDX_ENCODING_FROM = 0xff13, + IDX_ENCODING_TO = 0xff14, + IDX_HASH_INFO = 0xff15, + IDX_FORCE = 0xff16, + IDX_HWMON_DISABLE = 0xff17, + IDX_HWMON_TEMP_ABORT = 0xff18, IDX_HASH_MODE = 'm', - IDX_HCCAPX_MESSAGE_PAIR = 0xff18, + IDX_HCCAPX_MESSAGE_PAIR = 0xff19, IDX_HELP = 'h', - IDX_HEX_CHARSET = 0xff19, - IDX_HEX_SALT = 0xff1a, - IDX_HEX_WORDLIST = 0xff1b, - IDX_HOOK_THREADS = 0xff1c, - IDX_IDENTIFY = 0xff1d, + IDX_HEX_CHARSET = 0xff1a, + IDX_HEX_SALT = 0xff1b, + IDX_HEX_WORDLIST = 0xff1c, + IDX_HOOK_THREADS = 0xff1d, + IDX_IDENTIFY = 0xff1e, IDX_INCREMENT = 'i', - IDX_INCREMENT_MAX = 0xff1e, - IDX_INCREMENT_MIN = 0xff1f, - IDX_INDUCTION_DIR = 0xff20, - IDX_KEEP_GUESSING = 0xff21, + IDX_INCREMENT_MAX = 0xff1f, + IDX_INCREMENT_MIN = 0xff20, + IDX_INDUCTION_DIR = 0xff21, + IDX_KEEP_GUESSING = 0xff22, IDX_KERNEL_ACCEL = 'n', IDX_KERNEL_LOOPS = 'u', IDX_KERNEL_THREADS = 'T', - IDX_KEYBOARD_LAYOUT_MAPPING = 0xff22, - IDX_KEYSPACE = 0xff23, - IDX_LEFT = 0xff24, + IDX_KEYBOARD_LAYOUT_MAPPING = 0xff23, + IDX_KEYSPACE = 0xff24, + IDX_LEFT = 0xff25, IDX_LIMIT = 'l', - IDX_LOGFILE_DISABLE = 0xff25, - IDX_LOOPBACK = 0xff26, - IDX_MACHINE_READABLE = 0xff27, - IDX_MARKOV_CLASSIC = 0xff28, - IDX_MARKOV_DISABLE = 0xff29, - IDX_MARKOV_HCSTAT2 = 0xff2a, - IDX_MARKOV_INVERSE = 0xff2b, + IDX_LOGFILE_DISABLE = 0xff26, + IDX_LOOPBACK = 0xff27, + IDX_MACHINE_READABLE = 0xff28, + IDX_MARKOV_CLASSIC = 0xff29, + IDX_MARKOV_DISABLE = 0xff2a, + IDX_MARKOV_HCSTAT2 = 0xff2b, + IDX_MARKOV_INVERSE = 0xff2c, IDX_MARKOV_THRESHOLD = 't', - IDX_NONCE_ERROR_CORRECTIONS = 0xff2c, + IDX_NONCE_ERROR_CORRECTIONS = 0xff2d, IDX_OPENCL_DEVICE_TYPES = 'D', IDX_OPTIMIZED_KERNEL_ENABLE = 'O', - IDX_OUTFILE_AUTOHEX_DISABLE = 0xff2d, - IDX_OUTFILE_CHECK_DIR = 0xff2e, - IDX_OUTFILE_CHECK_TIMER = 0xff2f, - IDX_OUTFILE_FORMAT = 0xff30, + IDX_OUTFILE_AUTOHEX_DISABLE = 0xff2e, + IDX_OUTFILE_CHECK_DIR = 0xff2f, + IDX_OUTFILE_CHECK_TIMER = 0xff30, + IDX_OUTFILE_FORMAT = 0xff31, IDX_OUTFILE = 'o', - IDX_POTFILE_DISABLE = 0xff31, - IDX_POTFILE_PATH = 0xff32, - IDX_PROGRESS_ONLY = 0xff33, - IDX_QUIET = 0xff34, - IDX_REMOVE = 0xff35, - IDX_REMOVE_TIMER = 0xff36, - IDX_RESTORE = 0xff37, - IDX_RESTORE_DISABLE = 0xff38, - IDX_RESTORE_FILE_PATH = 0xff39, + IDX_POTFILE_DISABLE = 0xff32, + IDX_POTFILE_PATH = 0xff33, + IDX_PROGRESS_ONLY = 0xff34, + IDX_QUIET = 0xff35, + IDX_REMOVE = 0xff36, + IDX_REMOVE_TIMER = 0xff37, + IDX_RESTORE = 0xff38, + IDX_RESTORE_DISABLE = 0xff39, + IDX_RESTORE_FILE_PATH = 0xff3a, IDX_RP_FILE = 'r', - IDX_RP_GEN_FUNC_MAX = 0xff3a, - IDX_RP_GEN_FUNC_MIN = 0xff3b, + IDX_RP_GEN_FUNC_MAX = 0xff3b, + IDX_RP_GEN_FUNC_MIN = 0xff3c, IDX_RP_GEN = 'g', - IDX_RP_GEN_SEED = 0xff3c, + IDX_RP_GEN_SEED = 0xff3d, IDX_RULE_BUF_L = 'j', IDX_RULE_BUF_R = 'k', - IDX_RUNTIME = 0xff3d, - IDX_SCRYPT_TMTO = 0xff3e, + IDX_RUNTIME = 0xff3e, + IDX_SCRYPT_TMTO = 0xff3f, IDX_SEGMENT_SIZE = 'c', - IDX_SELF_TEST_DISABLE = 0xff3f, + IDX_SELF_TEST_DISABLE = 0xff40, IDX_SEPARATOR = 'p', - IDX_SESSION = 0xff40, - IDX_SHOW = 0xff41, + IDX_SESSION = 0xff41, + IDX_SHOW = 0xff42, IDX_SKIP = 's', IDX_SLOW_CANDIDATES = 'S', - IDX_SPEED_ONLY = 0xff42, - IDX_SPIN_DAMP = 0xff43, - IDX_STATUS = 0xff44, - IDX_STATUS_JSON = 0xff45, - IDX_STATUS_TIMER = 0xff46, - IDX_STDOUT_FLAG = 0xff47, - IDX_STDIN_TIMEOUT_ABORT = 0xff48, - IDX_TRUECRYPT_KEYFILES = 0xff49, - IDX_USERNAME = 0xff4a, - IDX_VERACRYPT_KEYFILES = 0xff4b, - IDX_VERACRYPT_PIM_START = 0xff4c, - IDX_VERACRYPT_PIM_STOP = 0xff4d, + IDX_SPEED_ONLY = 0xff43, + IDX_SPIN_DAMP = 0xff44, + IDX_STATUS = 0xff45, + IDX_STATUS_JSON = 0xff46, + IDX_STATUS_TIMER = 0xff47, + IDX_STDOUT_FLAG = 0xff48, + IDX_STDIN_TIMEOUT_ABORT = 0xff49, + IDX_TRUECRYPT_KEYFILES = 0xff4a, + IDX_USERNAME = 0xff4b, + IDX_VERACRYPT_KEYFILES = 0xff4c, + IDX_VERACRYPT_PIM_START = 0xff4d, + IDX_VERACRYPT_PIM_STOP = 0xff4e, IDX_VERSION_LOWER = 'v', IDX_VERSION = 'V', - IDX_WORDLIST_AUTOHEX_DISABLE = 0xff4e, + IDX_WORDLIST_AUTOHEX_DISABLE = 0xff4f, IDX_WORKLOAD_PROFILE = 'w', } user_options_map_t; @@ -1485,82 +1486,83 @@ typedef struct hc_device_param // API: hip - bool is_hip; + bool is_hip; - int hip_warp_size; + int hip_warp_size; - HIPdevice hip_device; - HIPcontext hip_context; - HIPstream hip_stream; + HIPdevice hip_device; + HIPcontext hip_context; + HIPstream hip_stream; - HIPevent hip_event1; - HIPevent hip_event2; + HIPevent hip_event1; + HIPevent hip_event2; - HIPmodule hip_module; - HIPmodule hip_module_shared; - HIPmodule hip_module_mp; - HIPmodule hip_module_amp; + HIPmodule hip_module; + HIPmodule hip_module_shared; + HIPmodule hip_module_mp; + HIPmodule hip_module_amp; - HIPfunction hip_function1; - HIPfunction hip_function12; - HIPfunction hip_function2; - HIPfunction hip_function2e; - HIPfunction hip_function23; - HIPfunction hip_function3; - HIPfunction hip_function4; - HIPfunction hip_function_init2; - HIPfunction hip_function_loop2; - HIPfunction hip_function_mp; - HIPfunction hip_function_mp_l; - HIPfunction hip_function_mp_r; - HIPfunction hip_function_amp; - HIPfunction hip_function_tm; - HIPfunction hip_function_memset; - HIPfunction hip_function_atinit; - HIPfunction hip_function_decompress; - HIPfunction hip_function_aux1; - HIPfunction hip_function_aux2; - HIPfunction hip_function_aux3; - HIPfunction hip_function_aux4; + HIPfunction hip_function1; + HIPfunction hip_function12; + HIPfunction hip_function2p; + HIPfunction hip_function2; + HIPfunction hip_function2e; + HIPfunction hip_function23; + HIPfunction hip_function3; + HIPfunction hip_function4; + HIPfunction hip_function_init2; + HIPfunction hip_function_loop2p; + HIPfunction hip_function_loop2; + HIPfunction hip_function_mp; + HIPfunction hip_function_mp_l; + HIPfunction hip_function_mp_r; + HIPfunction hip_function_amp; + HIPfunction hip_function_tm; + HIPfunction hip_function_memset; + HIPfunction hip_function_atinit; + HIPfunction hip_function_utf8toutf16le; + HIPfunction hip_function_decompress; + HIPfunction hip_function_aux1; + HIPfunction hip_function_aux2; + HIPfunction hip_function_aux3; + HIPfunction hip_function_aux4; - HIPdeviceptr hip_d_pws_buf; - HIPdeviceptr hip_d_pws_amp_buf; - HIPdeviceptr hip_d_pws_comp_buf; - HIPdeviceptr hip_d_pws_idx; - HIPdeviceptr hip_d_words_buf_l; - HIPdeviceptr hip_d_words_buf_r; - HIPdeviceptr hip_d_rules; - HIPdeviceptr hip_d_rules_c; - HIPdeviceptr hip_d_combs; - HIPdeviceptr hip_d_combs_c; - HIPdeviceptr hip_d_bfs; - HIPdeviceptr hip_d_bfs_c; - HIPdeviceptr hip_d_tm_c; - HIPdeviceptr hip_d_bitmap_s1_a; - HIPdeviceptr hip_d_bitmap_s1_b; - HIPdeviceptr hip_d_bitmap_s1_c; - HIPdeviceptr hip_d_bitmap_s1_d; - HIPdeviceptr hip_d_bitmap_s2_a; - HIPdeviceptr hip_d_bitmap_s2_b; - HIPdeviceptr hip_d_bitmap_s2_c; - HIPdeviceptr hip_d_bitmap_s2_d; - HIPdeviceptr hip_d_plain_bufs; - HIPdeviceptr hip_d_digests_buf; - HIPdeviceptr hip_d_digests_shown; - HIPdeviceptr hip_d_salt_bufs; - HIPdeviceptr hip_d_esalt_bufs; - HIPdeviceptr hip_d_tmps; - HIPdeviceptr hip_d_hooks; - HIPdeviceptr hip_d_result; - HIPdeviceptr hip_d_extra0_buf; - HIPdeviceptr hip_d_extra1_buf; - HIPdeviceptr hip_d_extra2_buf; - HIPdeviceptr hip_d_extra3_buf; - HIPdeviceptr hip_d_root_css_buf; - HIPdeviceptr hip_d_markov_css_buf; - HIPdeviceptr hip_d_st_digests_buf; - HIPdeviceptr hip_d_st_salts_buf; - HIPdeviceptr hip_d_st_esalts_buf; + HIPdeviceptr hip_d_pws_buf; + HIPdeviceptr hip_d_pws_amp_buf; + HIPdeviceptr hip_d_pws_comp_buf; + HIPdeviceptr hip_d_pws_idx; + HIPdeviceptr hip_d_rules; + HIPdeviceptr hip_d_rules_c; + HIPdeviceptr hip_d_combs; + HIPdeviceptr hip_d_combs_c; + HIPdeviceptr hip_d_bfs; + HIPdeviceptr hip_d_bfs_c; + HIPdeviceptr hip_d_tm_c; + HIPdeviceptr hip_d_bitmap_s1_a; + HIPdeviceptr hip_d_bitmap_s1_b; + HIPdeviceptr hip_d_bitmap_s1_c; + HIPdeviceptr hip_d_bitmap_s1_d; + HIPdeviceptr hip_d_bitmap_s2_a; + HIPdeviceptr hip_d_bitmap_s2_b; + HIPdeviceptr hip_d_bitmap_s2_c; + HIPdeviceptr hip_d_bitmap_s2_d; + HIPdeviceptr hip_d_plain_bufs; + HIPdeviceptr hip_d_digests_buf; + HIPdeviceptr hip_d_digests_shown; + HIPdeviceptr hip_d_salt_bufs; + HIPdeviceptr hip_d_esalt_bufs; + HIPdeviceptr hip_d_tmps; + HIPdeviceptr hip_d_hooks; + HIPdeviceptr hip_d_result; + HIPdeviceptr hip_d_extra0_buf; + HIPdeviceptr hip_d_extra1_buf; + HIPdeviceptr hip_d_extra2_buf; + HIPdeviceptr hip_d_extra3_buf; + HIPdeviceptr hip_d_root_css_buf; + HIPdeviceptr hip_d_markov_css_buf; + HIPdeviceptr hip_d_st_digests_buf; + HIPdeviceptr hip_d_st_salts_buf; + HIPdeviceptr hip_d_st_esalts_buf; // API: opencl @@ -1653,10 +1655,10 @@ typedef struct backend_ctx { bool enabled; - void *ocl; void *cuda; void *hip; - + void *ocl; + void *nvrtc; void *hiprtc; @@ -1667,6 +1669,7 @@ typedef struct backend_ctx int backend_devices_cnt; int backend_devices_active; + int cuda_devices_cnt; int cuda_devices_active; int hip_devices_cnt; @@ -1704,7 +1707,10 @@ typedef struct backend_ctx int nvrtc_driver_version; int cuda_driver_version; - // cuda + // hip + + int rc_hip_init; + int rc_hiprtc_init; int hiprtc_driver_version; int hip_driver_version; diff --git a/src/Makefile b/src/Makefile index acad8ddb4..e4832860c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -4,7 +4,7 @@ ## SHARED ?= 0 -DEBUG := 1 +DEBUG := 0 PRODUCTION := 0 PRODUCTION_VERSION := v6.2.2 ENABLE_CUBIN ?= 1 diff --git a/src/autotune.c b/src/autotune.c index 04f8bc4c5..cbb1ff499 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -157,8 +157,9 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max; - int CL_rc; int CU_rc; + int HIP_rc; + int CL_rc; if (device_param->is_cuda == true) { @@ -167,6 +168,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + HIP_rc = run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max); @@ -190,6 +198,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + HIP_rc = hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL); @@ -383,6 +398,27 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + int HIP_rc; + + HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 0, device_param->size_pws); + + if (HIP_rc == -1) return -1; + + HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, 0, device_param->size_plains); + + if (HIP_rc == -1) return -1; + + HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_digests_shown, 0, device_param->size_shown); + + if (HIP_rc == -1) return -1; + + HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_result, 0, device_param->size_results); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { int CL_rc; @@ -456,6 +492,11 @@ HC_API_CALL void *thread_autotune (void *p) if (rc_cuCtxSetCurrent == -1) return NULL; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; + } + const int rc_autotune = autotune (hashcat_ctx, device_param); if (rc_autotune == -1) @@ -463,5 +504,10 @@ HC_API_CALL void *thread_autotune (void *p) // we should do something here, tell hashcat main that autotune failed to abort } + if (device_param->is_hip == true) + { + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; + } + return NULL; } diff --git a/src/backend.c b/src/backend.c index 20ec98a0e..5b12cd2c5 100644 --- a/src/backend.c +++ b/src/backend.c @@ -55,6 +55,10 @@ static bool is_same_device (const hc_device_param_t *src, const hc_device_param_ if ((src->is_cuda == true) && (dst->is_cuda == true)) return false; + // HIP can't have aliases + + if ((src->is_hip == true) && (dst->is_hip == true)) return false; + // But OpenCL can have aliases if ((src->is_opencl == true) && (dst->is_opencl == true)) @@ -119,6 +123,10 @@ static int backend_ctx_find_alias_devices (hashcat_ctx_t *hashcat_ctx) if (alias_device->is_cuda == true) continue; + // this lets HIP devices survive over OpenCL + + if (alias_device->is_hip == true) continue; + // this lets native OpenCL runtime survive over generic OpenCL runtime if (alias_device->opencl_device_type & CL_DEVICE_TYPE_CPU) @@ -153,6 +161,7 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p if (strcmp (src->device_name, dst->device_name) != 0) return false; if (src->is_cuda != dst->is_cuda) return false; + if (src->is_hip != dst->is_hip) return false; if (src->is_opencl != dst->is_opencl) return false; if (strcmp (src->device_name, dst->device_name) != 0) return false; @@ -960,6 +969,213 @@ int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) return 0; } +// HIPRTC + +int hiprtc_init (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + memset (hiprtc, 0, sizeof (HIPRTC_PTR)); + + #if defined (_WIN) + hiprtc->lib = hc_dlopen ("fixme.dll"); + #elif defined (__APPLE__) + hiprtc->lib = hc_dlopen ("fixme.dylib"); + #elif defined (__CYGWIN__) + hiprtc->lib = hc_dlopen ("fixme.dll"); + #else + hiprtc->lib = hc_dlopen ("libamdhip64.so"); + + if (hiprtc->lib == NULL) hiprtc->lib = hc_dlopen ("libamdhip64.so.4"); + #endif + + if (hiprtc->lib == NULL) return -1; + + HC_LOAD_FUNC (hiprtc, hiprtcAddNameExpression, HIPRTC_HIPRTCADDNAMEEXPRESSION, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcCompileProgram, HIPRTC_HIPRTCCOMPILEPROGRAM, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcCreateProgram, HIPRTC_HIPRTCCREATEPROGRAM, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcDestroyProgram, HIPRTC_HIPRTCDESTROYPROGRAM, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetLoweredName, HIPRTC_HIPRTCGETLOWEREDNAME, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetCode, HIPRTC_HIPRTCGETPTX, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize, HIPRTC_HIPRTCGETPTXSIZE, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLog, HIPRTC_HIPRTCGETPROGRAMLOG, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLogSize, HIPRTC_HIPRTCGETPROGRAMLOGSIZE, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetErrorString, HIPRTC_HIPRTCGETERRORSTRING, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcVersion, HIPRTC_HIPRTCVERSION, HIPRTC, 1); + + return 0; +} + +void hiprtc_close (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + if (hiprtc) + { + if (hiprtc->lib) + { + hc_dlclose (hiprtc->lib); + } + + hcfree (backend_ctx->hiprtc); + + backend_ctx->hiprtc = NULL; + } +} + +int hc_hiprtcCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcCreateProgram (prog, src, name, numHeaders, headers, includeNames); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcCreateProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcDestroyProgram (prog); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcDestroyProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + #if 0 + for(int i =0; i< numOptions; i++) + printf("Option_%d = %s\n", i, options[i]); + #endif + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcCompileProgram (prog, numOptions, options); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcCompileProgram(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetProgramLogSize (prog, logSizeRet); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetProgramLogSize(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetProgramLog (prog, log); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetProgramLog(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, ptxSizeRet); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetCodeSize(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, ptx); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcGetCode(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + +int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; + + const hiprtcResult HIPRTC_err = hiprtc->hiprtcVersion (major, minor); + + if (HIPRTC_err != HIPRTC_SUCCESS) + { + event_log_error (hashcat_ctx, "hiprtcVersion(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); + + return -1; + } + + return 0; +} + // CUDA int cuda_init (hashcat_ctx_t *hashcat_ctx) @@ -2167,6 +2383,1215 @@ int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cub return 0; } +// HIP + +int hip_init (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + memset (hip, 0, sizeof (HIP_PTR)); + + #if defined (_WIN) + hip->lib = hc_dlopen ("fixme.dll"); + #elif defined (__APPLE__) + hip->lib = hc_dlopen ("fixme.dylib"); + #elif defined (__CYGWIN__) + hip->lib = hc_dlopen ("fixme.dll"); + #else + hip->lib = hc_dlopen ("libamdhip64.so"); + + //TODO: grab the 4 from the major RT version + if (hip->lib == NULL) hip->lib = hc_dlopen ("libamdhip64.so.4.2.40200"); + #endif + + if (hip->lib == NULL) return -1; + + // finding the right symbol is a PITA, + #define HC_LOAD_FUNC_HIP(ptr,name,hipname,type,libname,noerr) \ + do { \ + ptr->name = (type) hc_dlsym ((ptr)->lib, #hipname); \ + if ((noerr) != -1) { \ + if (!(ptr)->name) { \ + if ((noerr) == 1) { \ + event_log_error (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ + return -1; \ + } \ + if ((noerr) != 1) { \ + event_log_warning (hashcat_ctx, "%s is missing from %s shared library.", #name, #libname); \ + return 0; \ + } \ + } \ + } \ + } while (0) + + // finding the right symbol is a PITA, because of the _v2 suffix + // a good reference is cuda.h itself + // this needs to be verified for each new cuda release + + HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig, hipCtxGetCacheConfig, HIP_HIPCTXGETCACHECONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent, hipCtxGetCurrent, HIP_HIPCTXGETCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig, HIP_HIPCTXGETSHAREDMEMCONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig, hipCtxSetCacheConfig, HIP_HIPCTXSETCACHECONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig, HIP_HIPCTXSETSHAREDMEMCONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGet, hipDeviceGet, HIP_HIPDEVICEGET, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreateWithFlags, HIP_HIPEVENTCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventQuery, hipEventQuery, HIP_HIPEVENTQUERY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig, hipFuncSetCacheConfig, HIP_HIPFUNCSETCACHECONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, HIP_HIPFUNCSETSHAREDMEMCONFIG, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemAllocHost, hipMemAllocHost, HIP_HIPMEMALLOCHOST, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD, hipMemcpyDtoD, HIP_HIPMEMCPYDTOD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH, hipMemcpyDtoH, HIP_HIPMEMCPYDTOH, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD, hipMemcpyHtoD, HIP_HIPMEMCPYHTOD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleLoad, hipModuleLoad, HIP_HIPMODULELOAD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleLoadData, hipModuleLoadData, HIP_HIPMODULELOADDATA, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipProfilerStart, hipProfilerStart, HIP_HIPPROFILERSTART, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipProfilerStop, hipProfilerStop, HIP_HIPPROFILERSTOP, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent, hipStreamWaitEvent, HIP_HIPSTREAMWAITEVENT, HIP, 1); + #if defined (WITH_CUBINX) + HC_LOAD_FUNC_HIP (hip, hipLinkCreate, hipLinkCreate, HIP_HIPLINKCREATE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLinkAddData, hipLinkAddData, HIP_HIPLINKADDDATA, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLinkDestroy, hipLinkDestroy, HIP_HIPLINKDESTROY, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipLinkComplete, hipLinkComplete, HIP_HIPLINKCOMPLETE, HIP, 1); + #endif + + return 0; +} + +void hip_close (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + if (hip) + { + if (hip->lib) + { + hc_dlclose (hip->lib); + } + + hcfree (backend_ctx->hip); + + backend_ctx->hip = NULL; + } +} + +int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipInit (Flags); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipInit(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGetCount (count); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGetCount(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGetCount(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice* device, int ordinal) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGet (device, ordinal); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceGetName (name, len, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceGetName(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGetName(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDeviceTotalMem (bytes, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipDriverGetVersion (driverVersion); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipDriverGetVersion(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDriverGetVersion(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxCreate (pctx, flags, dev); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxDestroy (ctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleUnload (hmod); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxSetCurrent (ctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemAlloc (dptr, bytesize); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemFree (dptr); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t *bytes, HIPmodule hmod, const char *name) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipMemGetInfo (free, total); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipFuncSetAttribute (hfunc, attrib, value); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipStreamCreate (phStream, Flags); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipStreamDestroy (hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipStreamSynchronize (hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxSynchronize (); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventCreate (phEvent, Flags); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventDestroy (hEvent); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventElapsedTime(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventElapsedTime(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventQuery (hEvent); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventQuery(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventQuery(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventRecord (hEvent, hStream); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventRecord(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventRecord(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipEventSynchronize (hEvent); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipEventSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipEventSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxSetCacheConfig (config); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxPushCurrent (ctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipCtxPopCurrent (pctx); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkCreate (numOptions, options, optionValues, stateOut); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkAddData (state, type, data, size, name, numOptions, options, optionValues); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkAddData(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkAddData(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkDestroy (state); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipLinkComplete (state, hipbinOut, sizeOut); + + if (HIP_err != HIP_SUCCESS) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + { + event_log_error (hashcat_ctx, "hipLinkComplete(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipLinkComplete(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + // OpenCL int ocl_init (hashcat_ctx_t *hashcat_ctx) @@ -2916,6 +4341,15 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; + + if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, gidd * sizeof (pw_idx_t), sizeof (pw_idx_t), &pw_idx, 0, NULL, NULL) == -1) return -1; @@ -2937,6 +4371,18 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c } } + if (device_param->is_hip == true) + { + if (cnt > 0) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1; + + if (hc_hipMemcpyDtoH (hashcat_ctx,pw->i, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (cnt > 0) @@ -2986,6 +4432,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tm_c, size_tm) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tm_c, size_tm) == -1) return -1; @@ -2998,6 +4449,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_tm_c, size_tm) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tm_c, device_param->opencl_d_bfs_c, 0, 0, size_tm, 0, NULL, NULL) == -1) return -1; @@ -3059,6 +4515,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_pws_buf, device_param->cuda_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_amp_buf, device_param->opencl_d_pws_buf, 0, 0, pws_cnt * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -3079,6 +4540,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, pws_cnt) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, pws_cnt) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, pws_cnt) == -1) return -1; @@ -3096,6 +4562,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3145,6 +4616,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3234,6 +4710,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3283,6 +4764,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, pws_cnt * hashconfig->hook_size, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -3390,6 +4876,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_tmps, device_param->size_tmps) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_tmps, device_param->size_tmps) == -1) return -1; @@ -3403,6 +4894,11 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1; @@ -3557,6 +5053,99 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device return run_cuda_kernel_memset (hashcat_ctx, device_param, buf, 0, size); } +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_atinit[0] = (void *) &buf; + device_param->kernel_params_atinit_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_atinit; + + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction function = device_param->hip_function_atinit; + + if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_atinit, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + return 0; +} + +int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num) +{ + u64 num_elements = num; + + device_param->kernel_params_utf8toutf16le[0] = (void *) &buf; + device_param->kernel_params_utf8toutf16le_buf64[1] = num_elements; + + const u64 kernel_threads = device_param->kernel_wgs_utf8toutf16le; + + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction function = device_param->hip_function_utf8toutf16le; + + if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_utf8toutf16le, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + return 0; +} + +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size) +{ + const u64 num16d = size / 16; + const u64 num16m = size % 16; + + if (num16d) + { + device_param->kernel_params_memset[0] = (void *) &buf; + device_param->kernel_params_memset_buf32[1] = value; + device_param->kernel_params_memset_buf64[2] = num16d; + + const u64 kernel_threads = device_param->kernel_wgs_memset; + + u64 num_elements = num16d; + + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction function = device_param->hip_function_memset; + + //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 0, sizeof (cl_mem), (void *) &buf); if (HIP_rc == -1) return -1; + //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (HIP_rc == -1) return -1; + //HIP_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (HIP_rc == -1) return -1; + + //const size_t global_work_size[3] = { num_elements, 1, 1 }; + //const size_t local_work_size[3] = { kernel_threads, 1, 1 }; + + if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + + if (num16m) + { + u32 tmp[4]; + + tmp[0] = value; + tmp[1] = value; + tmp[2] = value; + tmp[3] = value; + + // Apparently are allowed to do this: https://devtalk.nvidia.com/default/topic/761515/how-to-copy-to-device-memory-with-offset-/ + + if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), tmp, num16m) == -1) return -1; + } + + return 0; +} + +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size) +{ + return run_hip_kernel_memset (hashcat_ctx, device_param, buf, 0, size); +} + int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num) { u64 num_elements = num; @@ -3860,6 +5449,105 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } + if (device_param->is_hip == true) + { + HIPfunction hip_function = NULL; + + if (device_param->is_hip == true) + { + switch (kern_run) + { + case KERN_RUN_1: hip_function = device_param->hip_function1; break; + case KERN_RUN_12: hip_function = device_param->hip_function12; break; + case KERN_RUN_2P: hip_function = device_param->hip_function2p; break; + case KERN_RUN_2: hip_function = device_param->hip_function2; break; + case KERN_RUN_2E: hip_function = device_param->hip_function2e; break; + case KERN_RUN_23: hip_function = device_param->hip_function23; break; + case KERN_RUN_3: hip_function = device_param->hip_function3; break; + case KERN_RUN_4: hip_function = device_param->hip_function4; break; + case KERN_RUN_INIT2: hip_function = device_param->hip_function_init2; break; + case KERN_RUN_LOOP2P: hip_function = device_param->hip_function_loop2p; break; + case KERN_RUN_LOOP2: hip_function = device_param->hip_function_loop2; break; + case KERN_RUN_AUX1: hip_function = device_param->hip_function_aux1; break; + case KERN_RUN_AUX2: hip_function = device_param->hip_function_aux2; break; + case KERN_RUN_AUX3: hip_function = device_param->hip_function_aux3; break; + case KERN_RUN_AUX4: hip_function = device_param->hip_function_aux4; break; + } + + if (hc_hipFuncSetAttribute (hashcat_ctx, hip_function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1; + } + + if (kernel_threads == 0) kernel_threads = 1; + + num_elements = CEILDIV (num_elements, kernel_threads); + + if (kern_run == KERN_RUN_1) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_3) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_COMP) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_INIT2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT2) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_LOOP2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP2) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + + if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event1, device_param->hip_stream) == -1) return -1; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1; + + if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event2, device_param->hip_stream) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + + if (hc_hipEventSynchronize (hashcat_ctx, device_param->hip_event2) == -1) return -1; + + float exec_ms; + + if (hc_hipEventElapsedTime (hashcat_ctx, &exec_ms, device_param->hip_event1, device_param->hip_event2) == -1) return -1; + + if (event_update) + { + u32 exec_pos = device_param->exec_pos; + + device_param->exec_msec[exec_pos] = exec_ms; + + exec_pos++; + + if (exec_pos == EXEC_CACHE) + { + exec_pos = 0; + } + + device_param->exec_pos = exec_pos; + } + } + if (device_param->is_opencl == true) { cl_kernel opencl_kernel = NULL; @@ -4089,6 +5777,32 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + HIPfunction hip_function = NULL; + + void **hip_args = NULL; + + switch (kern_run) + { + case KERN_RUN_MP: hip_function = device_param->hip_function_mp; + hip_args = device_param->kernel_params_mp; + break; + case KERN_RUN_MP_R: hip_function = device_param->hip_function_mp_r; + hip_args = device_param->kernel_params_mp_r; + break; + case KERN_RUN_MP_L: hip_function = device_param->hip_function_mp_l; + hip_args = device_param->kernel_params_mp_l; + break; + } + + num_elements = CEILDIV (num_elements, kernel_threads); + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, hip_args, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { cl_kernel opencl_kernel = NULL; @@ -4156,6 +5870,15 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + HIPfunction hip_function = device_param->hip_function_tm; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { cl_kernel cuda_kernel = device_param->opencl_kernel_tm; @@ -4192,6 +5915,17 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction hip_function = device_param->hip_function_amp; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { num_elements = round_up_multiple_64 (num_elements, kernel_threads); @@ -4232,6 +5966,17 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1; } + if (device_param->is_hip == true) + { + num_elements = CEILDIV (num_elements, kernel_threads); + + HIPfunction hip_function = device_param->hip_function_decompress; + + if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1; + + if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1; + } + if (device_param->is_opencl == true) { num_elements = round_up_multiple_64 (num_elements, kernel_threads); @@ -4290,6 +6035,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4324,6 +6083,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4392,6 +6165,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4426,6 +6213,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4458,6 +6259,20 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const } } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1; + + const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt; + + const u32 off = pw_idx->off; + + if (off) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1; + } + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1; @@ -4679,6 +6494,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, innerloop_pos * sizeof (kernel_rule_t), 0, innerloop_left * sizeof (kernel_rule_t), 0, NULL, NULL) == -1) return -1; @@ -4799,6 +6619,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1; @@ -4817,6 +6642,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -4835,6 +6665,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -4956,6 +6791,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1; @@ -4974,6 +6814,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs, device_param->opencl_d_combs_c, 0, 0, innerloop_left * sizeof (pw_t), 0, NULL, NULL) == -1) return -1; @@ -4994,6 +6839,11 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs, device_param->opencl_d_bfs_c, 0, 0, innerloop_left * sizeof (bf_t), 0, NULL, NULL) == -1) return -1; @@ -5278,6 +7128,99 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) } } + /** + * Load and map HIP library calls, then init HIP + */ + + int rc_hip_init = -1; + + if (user_options->backend_ignore_hip == false) + { + HIP_PTR *hip = (HIP_PTR *) hcmalloc (sizeof (HIP_PTR)); + + backend_ctx->hip = hip; + + rc_hip_init = hip_init (hashcat_ctx); + + if (rc_hip_init == -1) + { + backend_ctx->rc_hip_init = rc_hip_init; + + hip_close (hashcat_ctx); + } + + /** + * Load and map HIPRTC library calls + */ + + HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) hcmalloc (sizeof (HIPRTC_PTR)); + + backend_ctx->hiprtc = hiprtc; + + int rc_hiprtc_init = hiprtc_init (hashcat_ctx); + + if (rc_hiprtc_init == -1) + { + backend_ctx->rc_hiprtc_init = rc_hiprtc_init; + + hiprtc_close (hashcat_ctx); + } + + /** + * Check if both HIP and HIPRTC were load successful + */ + + if ((rc_hip_init == 0) && (rc_hiprtc_init == 0)) + { + // hiprtc version + + int hiprtc_major = 0; + int hiprtc_minor = 0; + + if (hc_hiprtcVersion (hashcat_ctx, &hiprtc_major, &hiprtc_minor) == -1) return -1; + + int hiprtc_driver_version = (hiprtc_major * 1000) + (hiprtc_minor * 10); + + backend_ctx->hiprtc_driver_version = hiprtc_driver_version; + + if (hiprtc_driver_version < 9000) + { + event_log_error (hashcat_ctx, "Outdated AMD HIPRTC driver version '%d' detected!", hiprtc_driver_version); + + event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions."); + event_log_warning (hashcat_ctx, NULL); + + return -1; + } + + // hip version + + int hip_driver_version = 10000; + + //if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driver_version) == -1) return -1; + + backend_ctx->hip_driver_version = hip_driver_version; + + if (hip_driver_version < 9000) + { + event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driver_version); + + event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions."); + event_log_warning (hashcat_ctx, NULL); + + return -1; + } + } + else + { + rc_hip_init = -1; + rc_hiprtc_init = -1; + + hip_close (hashcat_ctx); + hiprtc_close (hashcat_ctx); + } + } + /** * Load and map OpenCL library calls */ @@ -5301,11 +7244,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) * return if both CUDA and OpenCL initialization failed */ - if ((rc_cuda_init == -1) && (rc_ocl_init == -1)) + if ((rc_cuda_init == -1) && (rc_hip_init == -1) && (rc_ocl_init == -1)) { - event_log_error (hashcat_ctx, "ATTENTION! No OpenCL or CUDA installation found."); + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL, HIP or CUDA installation found."); - event_log_warning (hashcat_ctx, "You are probably missing the CUDA or OpenCL runtime installation."); + event_log_warning (hashcat_ctx, "You are probably missing the CUDA, HIP or OpenCL runtime installation."); event_log_warning (hashcat_ctx, NULL); #if defined (__linux__) @@ -5366,6 +7309,18 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) } } + /** + * HIP API: init + */ + + if (backend_ctx->hip) + { + if (hc_hipInit (hashcat_ctx, 0) == -1) + { + hip_close (hashcat_ctx); + } + } + /** * OpenCL API: init */ @@ -5614,11 +7569,11 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) * Final checks */ - if ((backend_ctx->cuda == NULL) && (backend_ctx->ocl == NULL)) + if ((backend_ctx->cuda == NULL) && (backend_ctx->hip == NULL) && (backend_ctx->ocl == NULL)) { - event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible or CUDA-compatible platform found."); + event_log_error (hashcat_ctx, "ATTENTION! No OpenCL-compatible, HIP-compatible or CUDA-compatible platform found."); - event_log_warning (hashcat_ctx, "You are probably missing the OpenCL or CUDA runtime installation."); + event_log_warning (hashcat_ctx, "You are probably missing the OpenCL, CUDA or HIP runtime installation."); event_log_warning (hashcat_ctx, NULL); #if defined (__linux__) @@ -5664,9 +7619,12 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx) hcfree (backend_ctx->opencl_platforms_version); } - nvrtc_close (hashcat_ctx); - cuda_close (hashcat_ctx); - ocl_close (hashcat_ctx); + nvrtc_close (hashcat_ctx); + hiprtc_close (hashcat_ctx); + + cuda_close (hashcat_ctx); + hip_close (hashcat_ctx); + ocl_close (hashcat_ctx); memset (backend_ctx, 0, sizeof (backend_ctx_t)); } @@ -5689,6 +7647,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int backend_devices_idx = 0; + // CUDA + int cuda_devices_cnt = 0; int cuda_devices_active = 0; @@ -5725,10 +7685,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->cuda_device = cuda_device; - device_param->is_cuda = true; - + device_param->is_cuda = true; + device_param->is_hip = false; device_param->is_opencl = false; + device_param->use_opencl12 = false; device_param->use_opencl20 = false; device_param->use_opencl21 = false; @@ -6047,6 +8008,390 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) backend_ctx->cuda_devices_cnt = cuda_devices_cnt; backend_ctx->cuda_devices_active = cuda_devices_active; + // HIP + + int hip_devices_cnt = 0; + int hip_devices_active = 0; + + if (backend_ctx->hip) + { + // device count + + if (hc_hipDeviceGetCount (hashcat_ctx, &hip_devices_cnt) == -1) + { + hip_close (hashcat_ctx); + } + + backend_ctx->hip_devices_cnt = hip_devices_cnt; + + // device specific + + for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++, backend_devices_idx++) + { + const u32 device_id = backend_devices_idx; + + hc_device_param_t *device_param = &devices_param[backend_devices_idx]; + + device_param->device_id = device_id; + + backend_ctx->backend_device_from_hip[hip_devices_idx] = backend_devices_idx; + + HIPdevice hip_device; + + if (hc_hipDeviceGet (hashcat_ctx, &hip_device, hip_devices_idx) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->hip_device = hip_device; + + device_param->is_cuda = false; + device_param->is_hip = true; + device_param->is_opencl = false; + + device_param->use_opencl12 = false; + device_param->use_opencl20 = false; + device_param->use_opencl21 = false; + + // device_name + + char *device_name = (char *) hcmalloc (HCBUFSIZ_TINY); + + if (hc_hipDeviceGetName (hashcat_ctx, device_name, HCBUFSIZ_TINY, hip_device) == -1) + { + device_param->skipped = true; + hcfree (device_name); + continue; + } + + device_param->device_name = device_name; + + hc_string_trim_leading (device_name); + + hc_string_trim_trailing (device_name); + + // device_processors + + int device_processors = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->device_processors = device_processors; + + // device_global_mem, device_maxmem_alloc, device_available_mem + + size_t bytes = 0; + + if (hc_hipDeviceTotalMem (hashcat_ctx, &bytes, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->device_global_mem = (u64) bytes; + + device_param->device_maxmem_alloc = (u64) bytes; + + device_param->device_available_mem = 0; + + // warp size + + int hip_warp_size = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->hip_warp_size = hip_warp_size; + + // sm_minor, sm_major + + int sm_major = 0; + int sm_minor = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->sm_major = sm_major; + device_param->sm_minor = sm_minor; + + // device_maxworkgroup_size + + int device_maxworkgroup_size = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->device_maxworkgroup_size = device_maxworkgroup_size; + + // max_clock_frequency + + int device_maxclock_frequency = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, HIP_DEVICE_ATTRIBUTE_CLOCK_RATE, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->device_maxclock_frequency = device_maxclock_frequency / 1000; + + // pcie_bus, pcie_device, pcie_function + + int pci_domain_id_nv = 0; + int pci_bus_id_nv = 0; + int pci_slot_id_nv = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->pcie_domain = (u8) (pci_domain_id_nv); + device_param->pcie_bus = (u8) (pci_bus_id_nv); + device_param->pcie_device = (u8) (pci_slot_id_nv >> 3); + device_param->pcie_function = (u8) (pci_slot_id_nv & 7); + + // kernel_exec_timeout + + int kernel_exec_timeout = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->kernel_exec_timeout = kernel_exec_timeout; + + // max_shared_memory_per_block + + int max_shared_memory_per_block = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + if (max_shared_memory_per_block < 32768) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's shared buffer size is too small.", device_id + 1); + + device_param->skipped = true; + } + + device_param->device_local_mem_size = max_shared_memory_per_block; + + // device_max_constant_buffer_size + + int device_max_constant_buffer_size = 0; + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + // TODO: broken on HIP? + + device_max_constant_buffer_size = 65536; + + if (device_max_constant_buffer_size < 65536) + { + event_log_error (hashcat_ctx, "* Device #%u: This device's local mem size is too small.", device_id + 1); + + device_param->skipped = true; + } + + // some attributes have to be hardcoded values because they are used for instance in the build options + + device_param->device_local_mem_type = CL_LOCAL; + device_param->opencl_device_type = CL_DEVICE_TYPE_GPU; + device_param->opencl_device_vendor_id = VENDOR_ID_AMD_USE_HIP; + device_param->opencl_platform_vendor_id = VENDOR_ID_AMD_USE_HIP; + + // or in the cached kernel checksum + + device_param->opencl_device_version = ""; + device_param->opencl_driver_version = ""; + + // or just to make sure they are not NULL + + device_param->opencl_device_vendor = ""; + device_param->opencl_device_c_version = ""; + + // skipped + + if ((backend_ctx->backend_devices_filter & (1ULL << device_id)) == 0) + { + device_param->skipped = true; + } + + #if !defined (__APPLE__) + if ((backend_ctx->opencl_device_types_filter & CL_DEVICE_TYPE_GPU) == 0) + { + device_param->skipped = true; + } + #endif + + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) + { + need_nvml = true; + + #if defined (_WIN) || defined (__CYGWIN__) + need_nvapi = true; + #endif + } + + // CPU burning loop damper + // Value is given as number between 0-100 + // By default 8% + // in theory not needed with HIP + + device_param->spin_damp = (double) user_options->spin_damp / 100; + + // common driver check + + if (device_param->skipped == false) + { + if ((user_options->force == false) && (user_options->backend_info == false)) + { + // HIPDA does not support query nvidia driver version, therefore no driver checks here + // IF needed, could be retrieved using nvmlSystemGetDriverVersion() + + if (device_param->sm_major < 5) + { + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: This hardware has outdated HIPDA compute capability (%u.%u).", device_id + 1, device_param->sm_major, device_param->sm_minor); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " For modern OpenCL performance, upgrade to hardware that supports"); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " HIPDA compute capability version 5.0 (Maxwell) or higher."); + } + + if (device_param->kernel_exec_timeout != 0) + { + if (user_options->quiet == false) event_log_advice (hashcat_ctx, "* Device #%u: WARNING! Kernel exec timeout is not disabled.", device_id + 1); + if (user_options->quiet == false) event_log_advice (hashcat_ctx, " This may cause \"CL_OUT_OF_RESOURCES\" or related errors."); + if (user_options->quiet == false) event_log_advice (hashcat_ctx, " To disable the timeout, see: https://hashcat.net/q/timeoutpatch"); + } + } + + // activate device moved below, at end + } + + // instruction set + + // bcrypt optimization? + //const int rc_cuCtxSetCacheConfig = hc_hipCtxSetCacheConfig (hashcat_ctx, HIP_FUNC_CACHE_PREFER_SHARED); + // + //if (rc_cuCtxSetCacheConfig == -1) return -1; + + // const int sm = (device_param->sm_major * 10) + device_param->sm_minor; + + device_param->has_add = false; + device_param->has_addc = false; + device_param->has_sub = false; + device_param->has_subc = false; + device_param->has_bfe = false; + device_param->has_lop3 = false; + device_param->has_mov64 = false; + device_param->has_prmt = false; + + device_param->has_vadd = false; + device_param->has_vaddc = false; + device_param->has_vadd_co = false; + device_param->has_vaddc_co = false; + device_param->has_vsub = false; + device_param->has_vsubb = false; + device_param->has_vsub_co = false; + device_param->has_vsubb_co = false; + device_param->has_vadd3 = false; + device_param->has_vbfe = false; + device_param->has_vperm = false; + + // device_available_mem + + HIPcontext hip_context; + + if (hc_hipCtxCreate (hashcat_ctx, &hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipCtxPushCurrent (hashcat_ctx, hip_context) == -1) + { + device_param->skipped = true; + continue; + } + + size_t free = 0; + size_t total = 0; + + if (hc_hipMemGetInfo (hashcat_ctx, &free, &total) == -1) + { + device_param->skipped = true; + continue; + } + + device_param->device_available_mem = (u64) free; + + if (hc_hipCtxPopCurrent (hashcat_ctx, &hip_context) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipCtxDestroy (hashcat_ctx, hip_context) == -1) + { + device_param->skipped = true; + continue; + } + + /** + * activate device + */ + + if (device_param->skipped == false) hip_devices_active++; + } + } + + backend_ctx->hip_devices_cnt = hip_devices_cnt; + backend_ctx->hip_devices_active = hip_devices_active; + + // OCL + int opencl_devices_cnt = 0; int opencl_devices_active = 0; @@ -6087,8 +8432,8 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) //device_param->opencl_platform = opencl_platform; - device_param->is_cuda = false; - + device_param->is_cuda = false; + device_param->is_hip = false; device_param->is_opencl = true; // store opencl platform i @@ -6829,7 +9174,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->spin_damp = (double) user_options->spin_damp / 100; - if (user_options->stdout_flag == false) { // recommend CUDA @@ -7065,12 +9409,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) // all devices combined go into backend_* variables - backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt; - backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active; + backend_ctx->backend_devices_cnt = cuda_devices_cnt + hip_devices_cnt + opencl_devices_cnt; + backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + opencl_devices_active; // find duplicate devices - //if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0)) + //if ((cuda_devices_cnt > 0) && (hip_devices_cnt > 0) && (opencl_devices_cnt > 0)) //{ // using force here enables both devices, which is the worst possible outcome // many users force by default, so this is not a good idea @@ -7171,6 +9515,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) */ } + if (backend_ctx->hip) + { + // TODO HIP + } + if (backend_ctx->ocl) { for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) @@ -7449,6 +9798,8 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx) backend_ctx->backend_devices_active = 0; backend_ctx->cuda_devices_cnt = 0; backend_ctx->cuda_devices_active = 0; + backend_ctx->hip_devices_cnt = 0; + backend_ctx->hip_devices_active = 0; backend_ctx->opencl_devices_cnt = 0; backend_ctx->opencl_devices_active = 0; @@ -7657,6 +10008,62 @@ static int get_cuda_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, C return 0; } +static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u32 *result) +{ + int max_threads_per_block; + + if (hc_hipFuncGetAttribute (hashcat_ctx, &max_threads_per_block, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, function) == -1) return -1; + + *result = (u32) max_threads_per_block; + + return 0; +} + +static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result) +{ + int shared_size_bytes; + + if (hc_hipFuncGetAttribute (hashcat_ctx, &shared_size_bytes, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, function) == -1) return -1; + + *result = (u64) shared_size_bytes; + + return 0; +} + +static int get_hip_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result) +{ + // AFAIK there's no way to query the maximum value for dynamic shared memory available (because it depends on kernel code). + // let's brute force it, therefore workaround the hashcat wrapper of cuFuncSetAttribute() + + #define MAX_ASSUMED_SHARED (1024 * 1024) + + u64 dynamic_shared_size_bytes = 0; + + for (int i = 1; i <= MAX_ASSUMED_SHARED; i++) + { + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const HIPresult HIP_err = hip->hipFuncSetAttribute (function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, i); + + if (HIP_err == HIP_SUCCESS) + { + dynamic_shared_size_bytes = i; + + continue; + } + + break; + } + + *result = dynamic_shared_size_bytes; + + if (hc_hipFuncSetAttribute (hashcat_ctx, function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, 0) == -1) return -1; + + return 0; +} + static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) { size_t work_group_size = 0; @@ -7774,7 +10181,7 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param) return kernel_threads; } -static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module) +static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, HIPmodule *hip_module) { const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; const folder_config_t *folder_config = hashcat_ctx->folder_config; @@ -8053,6 +10460,248 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p hcfree (binary); } + if (device_param->is_hip == true) + { + hiprtcProgram program; + + if (hc_hiprtcCreateProgram (hashcat_ctx, &program, kernel_sources[0], kernel_name, 0, NULL, NULL) == -1) return false; + + char **hiprtc_options = (char **) hccalloc (6 + strlen (build_options_buf) + 1, sizeof (char *)); // ... + + //hiprtc_options[0] = "--restrict"; + //hiprtc_options[1] = "--device-as-default-execution-space"; + //hiprtc_options[2] = "--gpu-architecture"; + + //hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor); + + // TODO HIP + + hiprtc_options[0] = ""; + hiprtc_options[1] = ""; + hiprtc_options[2] = ""; + hiprtc_options[3] = ""; + + hiprtc_options[4] = "-I"; + hiprtc_options[5] = folder_config->cpath_real; + + char *hiprtc_options_string = hcstrdup (build_options_buf); + + const int num_options = 6 + hiprtc_make_options_array_from_string (hiprtc_options_string, hiprtc_options + 6); + + const int rc_hiprtcCompileProgram = hc_hiprtcCompileProgram (hashcat_ctx, program, num_options, (const char * const *) hiprtc_options); + + hcfree (hiprtc_options_string); + hcfree (hiprtc_options); + + size_t build_log_size = 0; + + hc_hiprtcGetProgramLogSize (hashcat_ctx, program, &build_log_size); + + #if defined (DEBUG) + if ((build_log_size > 1) || (rc_hiprtcCompileProgram == -1)) + #else + if (rc_hiprtcCompileProgram == -1) + #endif + { + char *build_log = (char *) hcmalloc (build_log_size + 1); + + if (hc_hiprtcGetProgramLog (hashcat_ctx, program, build_log) == -1) + { + hcfree (build_log); + + return false; + } + + build_log[build_log_size] = 0; + + puts (build_log); + + hcfree (build_log); + } + + if (rc_hiprtcCompileProgram == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s build failed.", device_param->device_id + 1, source_file); + + return false; + } + + size_t binary_size = 0; + + if (hc_hiprtcGetCodeSize (hashcat_ctx, program, &binary_size) == -1) return false; + + char *binary = (char *) hcmalloc (binary_size); + + if (hc_hiprtcGetCode (hashcat_ctx, program, binary) == -1) return false; + + if (hc_hiprtcDestroyProgram (hashcat_ctx, &program) == -1) return false; + + #define LOG_SIZE 8192 + + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int mod_cnt = 6; + + HIPjit_option mod_opts[7]; + void *mod_vals[7]; + + mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + mod_vals[0] = (void *) 0; + + mod_opts[1] = HIP_JIT_LOG_VERBOSE; + mod_vals[1] = (void *) 1; + + mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + mod_vals[2] = (void *) mod_info_log; + + mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_vals[3] = (void *) LOG_SIZE; + + mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + mod_vals[4] = (void *) mod_error_log; + + mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + mod_opts[6] = HIP_JIT_MAX_REGISTERS; + mod_vals[6] = (void *) 128; + + mod_cnt++; + } + + #if defined (WITH_HIPBIN) + + char *jit_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int jit_cnt = 6; + + HIPjit_option jit_opts[7]; + void *jit_vals[7]; + + jit_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + jit_vals[0] = (void *) 0; + + jit_opts[1] = HIP_JIT_LOG_VERBOSE; + jit_vals[1] = (void *) 1; + + jit_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + jit_vals[2] = (void *) jit_info_log; + + jit_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + jit_vals[3] = (void *) LOG_SIZE; + + jit_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + jit_vals[4] = (void *) jit_error_log; + + jit_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + jit_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + jit_opts[6] = HIP_JIT_MAX_REGISTERS; + jit_vals[6] = (void *) 128; + + jit_cnt++; + } + + HIPlinkState state; + + if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + if (hc_cuLinkAddData (hashcat_ctx, state, HIP_JIT_INPUT_CODE, binary, binary_size, kernel_name, 0, NULL, NULL) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + void *cubin = NULL; + + size_t cubin_size = 0; + + if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", jit_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", jit_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (hc_cuModuleLoadDataEx (hashcat_ctx, hip_module, cubin, mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) + { + if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false; + } + + if (hc_hipLinkDestroy (hashcat_ctx, state) == -1) return false; + + hcfree (jit_info_log); + hcfree (jit_error_log); + + #else + + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + if (cache_disable == false) + { + if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; + } + + #endif + + hcfree (mod_info_log); + hcfree (mod_error_log); + + hcfree (binary); + } + if (device_param->is_opencl == true) { size_t build_log_size = 0; @@ -8185,6 +10834,63 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p hcfree (mod_error_log); } + if (device_param->is_hip == true) + { + #define LOG_SIZE 8192 + + char *mod_info_log = (char *) hcmalloc (LOG_SIZE + 1); + char *mod_error_log = (char *) hcmalloc (LOG_SIZE + 1); + + int mod_cnt = 6; + + HIPjit_option mod_opts[7]; + void *mod_vals[7]; + + mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + mod_vals[0] = (void *) 0; + + mod_opts[1] = HIP_JIT_LOG_VERBOSE; + mod_vals[1] = (void *) 1; + + mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + mod_vals[2] = (void *) mod_info_log; + + mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_vals[3] = (void *) LOG_SIZE; + + mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + mod_vals[4] = (void *) mod_error_log; + + mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_vals[5] = (void *) LOG_SIZE; + + if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) + { + mod_opts[6] = HIP_JIT_MAX_REGISTERS; + mod_vals[6] = (void *) 128; + + mod_cnt++; + } + + if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) + { + event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); + event_log_error (hashcat_ctx, "%s", mod_error_log); + event_log_error (hashcat_ctx, NULL); + + return false; + } + + #if defined (DEBUG) + event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); + event_log_info (hashcat_ctx, "%s", mod_info_log); + event_log_info (hashcat_ctx, NULL); + #endif + + hcfree (mod_info_log); + hcfree (mod_error_log); + } + if (device_param->is_opencl == true) { if (hc_clCreateProgramWithBinary (hashcat_ctx, device_param->opencl_context, 1, &device_param->opencl_device, kernel_lengths, (const unsigned char **) kernel_sources, NULL, opencl_program) == -1) return false; @@ -8243,7 +10949,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if ((unstable_warning == true) && (user_options->force == false)) { event_log_warning (hashcat_ctx, "* Device #%u: Skipping hash-mode %u)", device_id + 1, hashconfig->hash_mode); - event_log_warning (hashcat_ctx, " This is due to a known CUDA/OpenCL runtime/driver issue (not a hashcat issue)"); + event_log_warning (hashcat_ctx, " This is due to a known CUDA/HIP/OpenCL runtime/driver issue (not a hashcat issue)"); event_log_warning (hashcat_ctx, " You can use --force to override, but do not report related errors."); device_param->skipped_warning = true; @@ -8282,6 +10988,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) vector_width = 1; } + if (device_param->is_hip == true) + { + // hip does not support this query + + vector_width = 1; + } + if (device_param->is_opencl == true) { if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, sizeof (vector_width), &vector_width, NULL) == -1) @@ -8300,6 +11013,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) vector_width = 1; } + if (device_param->is_hip == true) + { + // hip does not support this query + + vector_width = 1; + } + if (device_param->is_opencl == true) { if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof (vector_width), &vector_width, NULL) == -1) @@ -8547,6 +11267,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + if (device_param->is_hip == true) + { + if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) + { + device_param->skipped = true; + continue; + } + } + if (device_param->is_opencl == true) { /* @@ -8592,6 +11327,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + /** + * create stream for HIP devices + */ + + if (device_param->is_hip == true) + { + if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, HIP_STREAM_DEFAULT) == -1) + { + device_param->skipped = true; + continue; + } + } + /** * create events for CUDA devices */ @@ -8611,6 +11359,25 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + /** + * create events for HIP devices + */ + + if (device_param->is_hip == true) + { + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, HIP_EVENT_BLOCKING_SYNC) == -1) + { + device_param->skipped = true; + continue; + } + + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, HIP_EVENT_BLOCKING_SYNC) == -1) + { + device_param->skipped = true; + continue; + } + } + /** * create input buffers on device : calculate size of fixed memory buffers */ @@ -8726,7 +11493,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) int build_options_len = 0; - if (device_param->is_cuda == true) + if ((device_param->is_cuda == true) || (device_param->is_hip == true)) { // using a path with a space will break nvrtc_make_options_array_from_string() // we add it to options array in a clean way later @@ -8789,9 +11556,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 }; - const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s", + const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, + backend_ctx->hip_driver_version, device_param->is_opencl, device_param->opencl_platform_vendor_id, device_param->device_name, @@ -8861,7 +11629,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) generate_cached_kernel_shared_filename (folder_config->cache_dir, device_name_chksum_amp_mp, cached_file); - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "shared_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_shared, &device_param->cuda_module_shared, &device_param->hip_module_shared); if (rc_load_kernel == false) { @@ -8928,6 +11696,64 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->cuda_warp_size; } + if (device_param->is_hip == true) + { + // GPU memset + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_memset, device_param->hip_module_shared, "gpu_memset") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_wgs_memset) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 0, sizeof (cl_mem), device_param->kernel_params_memset[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 1, sizeof (cl_uint), device_param->kernel_params_memset[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_memset, 2, sizeof (cl_ulong), device_param->kernel_params_memset[2]); if (CL_rc == -1) return -1; + + // GPU autotune init + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_atinit, device_param->hip_module_shared, "gpu_atinit") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_wgs_atinit) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size; + + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 0, sizeof (cl_mem), device_param->kernel_params_atinit[0]); if (CL_rc == -1) return -1; + // CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_atinit, 1, sizeof (cl_ulong), device_param->kernel_params_atinit[1]); if (CL_rc == -1) return -1; + + // GPU decompress + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_decompress, device_param->hip_module_shared, "gpu_decompress") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_wgs_decompress) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size; + + // GPU utf8 to utf16le conversion + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_utf8toutf16le, device_param->hip_module_shared, "gpu_utf8_to_utf16") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_wgs_utf8toutf16le) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size; + } + if (device_param->is_opencl == true) { // GPU memset @@ -9029,9 +11855,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE; - const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s", + const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, + backend_ctx->hip_driver_version, device_param->is_opencl, device_param->opencl_platform_vendor_id, device_param->device_name, @@ -9077,7 +11904,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) * load kernel */ - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "main_kernel", source_file, cached_file, build_options_module_buf, cache_disable, &device_param->opencl_program, &device_param->cuda_module, &device_param->hip_module); if (rc_load_kernel == false) { @@ -9123,7 +11950,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) generate_cached_kernel_mp_filename (hashconfig->opti_type, hashconfig->opts_type, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file); - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "mp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_mp, &device_param->cuda_module_mp, &device_param->hip_module_mp); if (rc_load_kernel == false) { @@ -9172,7 +11999,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) generate_cached_kernel_amp_filename (user_options_extra->attack_kern, folder_config->cache_dir, device_name_chksum_amp_mp, cached_file); - const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp); + const bool rc_load_kernel = load_kernel (hashcat_ctx, device_param, "amp_kernel", source_file, cached_file, build_options_buf, cache_disable, &device_param->opencl_program_amp, &device_param->cuda_module_amp, &device_param->hip_module_amp); if (rc_load_kernel == false) { @@ -9354,6 +12181,116 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + if (device_param->is_hip == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_plain_bufs, size_plains) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_buf, size_digests) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_digests_shown, size_shown) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_salt_bufs, size_salts) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_result, size_results) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra0_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra1_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra2_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_extra3_buf, size_extra_buffer / 4) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_digests_buf, size_st_digests) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf, size_st_salts) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf, size_digests) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_salt_bufs, hashes->salts_buf, size_salts) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules, size_rules) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_rules_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + } + else + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs, size_combs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_combs_c, size_combs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs, size_bfs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + size_t dummy = 0; + + if (hc_hipModuleGetGlobal (hashcat_ctx, &device_param->hip_d_bfs_c, &dummy, device_param->hip_module, "generic_constant") == -1) return -1; + + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + else + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_bfs_c, size_bfs) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tm_c, size_tm) == -1) return -1; + } + } + } + + if (size_esalts) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts) == -1) return -1; + } + + if (hashconfig->st_hash != NULL) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_salts_buf, hashes->st_salts_buf, size_st_salts) == -1) return -1; + + if (size_esalts) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts) == -1) return -1; + } + } + } + if (device_param->is_opencl == true) { if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, bitmap_ctx->bitmap_size, NULL, &device_param->opencl_d_bitmap_s1_a) == -1) return -1; @@ -9488,6 +12425,34 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params[23] = &device_param->cuda_d_extra3_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params[ 0] = NULL; // &device_param->hip_d_pws_buf; + device_param->kernel_params[ 1] = &device_param->hip_d_rules_c; + device_param->kernel_params[ 2] = &device_param->hip_d_combs_c; + device_param->kernel_params[ 3] = &device_param->hip_d_bfs_c; + device_param->kernel_params[ 4] = NULL; // &device_param->hip_d_tmps; + device_param->kernel_params[ 5] = NULL; // &device_param->hip_d_hooks; + device_param->kernel_params[ 6] = &device_param->hip_d_bitmap_s1_a; + device_param->kernel_params[ 7] = &device_param->hip_d_bitmap_s1_b; + device_param->kernel_params[ 8] = &device_param->hip_d_bitmap_s1_c; + device_param->kernel_params[ 9] = &device_param->hip_d_bitmap_s1_d; + device_param->kernel_params[10] = &device_param->hip_d_bitmap_s2_a; + device_param->kernel_params[11] = &device_param->hip_d_bitmap_s2_b; + device_param->kernel_params[12] = &device_param->hip_d_bitmap_s2_c; + device_param->kernel_params[13] = &device_param->hip_d_bitmap_s2_d; + device_param->kernel_params[14] = &device_param->hip_d_plain_bufs; + device_param->kernel_params[15] = &device_param->hip_d_digests_buf; + device_param->kernel_params[16] = &device_param->hip_d_digests_shown; + device_param->kernel_params[17] = &device_param->hip_d_salt_bufs; + device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs; + device_param->kernel_params[19] = &device_param->hip_d_result; + device_param->kernel_params[20] = &device_param->hip_d_extra0_buf; + device_param->kernel_params[21] = &device_param->hip_d_extra1_buf; + device_param->kernel_params[22] = &device_param->hip_d_extra2_buf; + device_param->kernel_params[23] = &device_param->hip_d_extra3_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params[ 0] = NULL; // &device_param->opencl_d_pws_buf; @@ -9549,6 +12514,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; @@ -9563,6 +12533,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp[0] = &device_param->cuda_d_combs; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = &device_param->hip_d_combs; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = &device_param->opencl_d_combs; @@ -9582,6 +12557,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp[2] = &device_param->cuda_d_markov_css_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp[2] = &device_param->hip_d_markov_css_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[1] = &device_param->opencl_d_root_css_buf; @@ -9606,12 +12587,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp_l[0] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) // ? &device_param->opencl_d_pws_buf // : &device_param->opencl_d_pws_amp_buf; + if (device_param->is_cuda == true) { device_param->kernel_params_mp_l[1] = &device_param->cuda_d_root_css_buf; device_param->kernel_params_mp_l[2] = &device_param->cuda_d_markov_css_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_l[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp_l[2] = &device_param->hip_d_markov_css_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp_l[1] = &device_param->opencl_d_root_css_buf; @@ -9640,6 +12628,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_mp_r[2] = &device_param->cuda_d_markov_css_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_r[0] = &device_param->hip_d_bfs; + device_param->kernel_params_mp_r[1] = &device_param->hip_d_root_css_buf; + device_param->kernel_params_mp_r[2] = &device_param->hip_d_markov_css_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp_r[0] = &device_param->opencl_d_bfs; @@ -9666,6 +12661,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_amp[4] = &device_param->cuda_d_bfs_c; } + if (device_param->is_hip == true) + { + device_param->kernel_params_amp[0] = NULL; // &device_param->hip_d_pws_buf; + device_param->kernel_params_amp[1] = NULL; // &device_param->hip_d_pws_amp_buf; + device_param->kernel_params_amp[2] = &device_param->hip_d_rules_c; + device_param->kernel_params_amp[3] = &device_param->hip_d_combs_c; + device_param->kernel_params_amp[4] = &device_param->hip_d_bfs_c; + } + if (device_param->is_opencl == true) { device_param->kernel_params_amp[0] = NULL; // &device_param->opencl_d_pws_buf; @@ -9684,6 +12688,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params_tm[1] = &device_param->cuda_d_tm_c; } + if (device_param->is_hip == true) + { + device_param->kernel_params_tm[0] = &device_param->hip_d_bfs_c; + device_param->kernel_params_tm[1] = &device_param->hip_d_tm_c; + } + if (device_param->is_opencl == true) { device_param->kernel_params_tm[0] = &device_param->opencl_d_bfs_c; @@ -9719,6 +12729,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // : &device_param->cuda_d_pws_amp_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params_decompress[0] = NULL; // &device_param->hip_d_pws_idx; + device_param->kernel_params_decompress[1] = NULL; // &device_param->hip_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = NULL; // (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + // ? &device_param->hip_d_pws_buf + // : &device_param->hip_d_pws_amp_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params_decompress[0] = NULL; // &device_param->opencl_d_pws_idx; @@ -10336,6 +13355,608 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } } + if (device_param->is_hip == true) + { + char kernel_name[64] = { 0 }; + + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + if (hashconfig->opti_type & OPTI_TYPE_SINGLE_HASH) + { + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 4); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 8); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_s%02d", kern_type, 16); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + } + else + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_sxx", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; + } + } + else + { + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 4); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 8); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_m%02d", kern_type, 16); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + } + else + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_mxx", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function4, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function4, &device_param->kernel_wgs4) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if (user_options->attack_mode == ATTACK_MODE_BF) + { + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_tm", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_tm, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_wgs_tm) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_dynamic_local_mem_size_tm) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size; + } + } + } + } + else + { + // kernel1 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function1, &device_param->kernel_wgs1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; + + // kernel2 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2, &device_param->kernel_wgs2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; + + // kernel3 + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_comp", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function3, &device_param->kernel_wgs3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; + + if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE) + { + // kernel2p + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_prepare", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2p, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_wgs2p) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size; + } + + if (hashconfig->opts_type & OPTS_TYPE_LOOP_EXTENDED) + { + // kernel2e + + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop_extended", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function2e, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_wgs2e) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_dynamic_local_mem_size2e) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size; + } + + // kernel12 + + if (hashconfig->opts_type & OPTS_TYPE_HOOK12) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook12", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function12, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function12, &device_param->kernel_wgs12) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_dynamic_local_mem_size12) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size; + } + + // kernel23 + + if (hashconfig->opts_type & OPTS_TYPE_HOOK23) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_hook23", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function23, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function23, &device_param->kernel_wgs23) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_dynamic_local_mem_size23) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size; + } + + // init2 + + if (hashconfig->opts_type & OPTS_TYPE_INIT2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_init2", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_init2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_wgs_init2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_dynamic_local_mem_size_init2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size; + } + + // loop2 prepare + + if (hashconfig->opts_type & OPTS_TYPE_LOOP2_PREPARE) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2_prepare", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2p, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_wgs_loop2p) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size; + } + + // loop2 + + if (hashconfig->opts_type & OPTS_TYPE_LOOP2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_loop2", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_loop2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_wgs_loop2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_dynamic_local_mem_size_loop2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size; + } + + // aux1 + + if (hashconfig->opts_type & OPTS_TYPE_AUX1) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux1", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux1, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_wgs_aux1) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_dynamic_local_mem_size_aux1) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size; + } + + // aux2 + + if (hashconfig->opts_type & OPTS_TYPE_AUX2) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux2", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux2, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_wgs_aux2) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_dynamic_local_mem_size_aux2) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size; + } + + // aux3 + + if (hashconfig->opts_type & OPTS_TYPE_AUX3) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux3", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux3, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_wgs_aux3) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_dynamic_local_mem_size_aux3) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size; + } + + // aux4 + + if (hashconfig->opts_type & OPTS_TYPE_AUX4) + { + snprintf (kernel_name, sizeof (kernel_name), "m%05u_aux4", kern_type); + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_aux4, device_param->hip_module, kernel_name) == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_wgs_aux4) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_dynamic_local_mem_size_aux4) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size; + } + } + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 3, sizeof (cl_ulong), device_param->kernel_params_decompress[3]); if (CL_rc == -1) return -1; + + // MP start + + if (user_options->slow_candidates == true) + { + } + else + { + if (user_options->attack_mode == ATTACK_MODE_BF) + { + // mp_l + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_l, device_param->hip_module_mp, "l_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_wgs_mp_l) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_dynamic_local_mem_size_mp_l) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size; + + // mp_r + + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp_r, device_param->hip_module_mp, "r_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_wgs_mp_r) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_dynamic_local_mem_size_mp_r) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size; + + if (user_options->attack_mode == ATTACK_MODE_BF) + { + if (hashconfig->opts_type & OPTS_TYPE_TM_KERNEL) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 0, sizeof (cl_mem), device_param->kernel_params_tm[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_tm, 1, sizeof (cl_mem), device_param->kernel_params_tm[1]); if (CL_rc == -1) return -1; + } + } + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + { + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) + { + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_mp, device_param->hip_module_mp, "C_markov") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_wgs_mp) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + // nothing to do + } + else + { + if (hc_hipModuleGetFunction (hashcat_ctx, &device_param->hip_function_amp, device_param->hip_module_amp, "amp") == -1) return -1; + + if (get_hip_kernel_wgs (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_wgs_amp) == -1) return -1; + + if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; + + if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_dynamic_local_mem_size_amp) == -1) return -1; + + device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size; + } + + /* + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + { + // nothing to do + } + else + { + for (u32 i = 0; i < 5; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_mem), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + + for (u32 i = 5; i < 6; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_uint), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + + for (u32 i = 6; i < 7; i++) + { + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, i, sizeof (cl_ulong), device_param->kernel_params_amp[i]); + + //if (CL_rc == -1) return -1; + } + } + */ + } + + // zero some data buffers + + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1; + + /** + * special buffers + */ + + if (user_options->slow_candidates == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else + { + if (user_options_extra->attack_kern == ATTACK_KERN_STRAIGHT) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, size_rules_c) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs, size_combs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, size_combs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + else if (user_options_extra->attack_kern == ATTACK_KERN_BF) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs, size_bfs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, size_bfs) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tm_c, size_tm) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_root_css_buf, size_root_css) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_markov_css_buf, size_markov_css) == -1) return -1; + } + } + + if (user_options->slow_candidates == true) + { + } + else + { + if ((user_options->attack_mode == ATTACK_MODE_HYBRID1) || (user_options->attack_mode == ATTACK_MODE_HYBRID2)) + { + /** + * prepare mp + */ + + if (user_options->attack_mode == ATTACK_MODE_HYBRID1) + { + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; + + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_buf32[5] = full01; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_buf32[5] = full06; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_buf32[5] = full80; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_buf32[6] = 1; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_buf32[7] = 1; + } + else if (user_options->attack_mode == ATTACK_MODE_HYBRID2) + { + device_param->kernel_params_mp_buf32[5] = 0; + device_param->kernel_params_mp_buf32[6] = 0; + device_param->kernel_params_mp_buf32[7] = 0; + } + + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_mem), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + } + else if (user_options->attack_mode == ATTACK_MODE_BF) + { + /** + * prepare mp_r and mp_l + */ + + device_param->kernel_params_mp_l_buf32[6] = 0; + device_param->kernel_params_mp_l_buf32[7] = 0; + device_param->kernel_params_mp_l_buf32[8] = 0; + + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD01) device_param->kernel_params_mp_l_buf32[6] = full01; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD06) device_param->kernel_params_mp_l_buf32[6] = full06; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADD80) device_param->kernel_params_mp_l_buf32[6] = full80; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS14) device_param->kernel_params_mp_l_buf32[7] = 1; + if (hashconfig->opts_type & OPTS_TYPE_PT_ADDBITS15) device_param->kernel_params_mp_l_buf32[8] = 1; + + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_mem), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 0; i < 3; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_mem), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + } + } + } + if (device_param->is_opencl == true) { // GPU memset @@ -11208,6 +14829,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_buf, size_pws) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_amp_buf, size_pws_amp) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_comp_buf, size_pws_comp) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_pws_idx, size_pws_idx) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_tmps, size_tmps) == -1) return -1; + if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_hooks, size_hooks) == -1) return -1; + + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_buf, device_param->size_pws) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_amp_buf, device_param->size_pws_amp) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_comp_buf, device_param->size_pws_comp) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_idx, device_param->size_pws_idx) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_WRITE, size_pws, NULL, &device_param->opencl_d_pws_buf) == -1) return -1; @@ -11279,6 +14917,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->kernel_params[ 5] = &device_param->cuda_d_hooks; } + if (device_param->is_hip == true) + { + device_param->kernel_params[ 0] = &device_param->hip_d_pws_buf; + device_param->kernel_params[ 4] = &device_param->hip_d_tmps; + device_param->kernel_params[ 5] = &device_param->hip_d_hooks; + } + if (device_param->is_opencl == true) { device_param->kernel_params[ 0] = &device_param->opencl_d_pws_buf; @@ -11308,6 +14953,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? &device_param->hip_d_pws_buf + : &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, 0, sizeof (cl_mem), device_param->kernel_params_mp[0]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -11330,6 +14984,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? &device_param->hip_d_pws_buf + : &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, 0, sizeof (cl_mem), device_param->kernel_params_mp_l[0]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_mp_l[0] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -11355,6 +15018,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_amp[0] = &device_param->hip_d_pws_buf; + device_param->kernel_params_amp[1] = &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 0, sizeof (cl_mem), device_param->kernel_params_amp[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_amp, 1, sizeof (cl_mem), device_param->kernel_params_amp[1]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_amp[0] = &device_param->opencl_d_pws_buf; @@ -11379,6 +15051,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params_decompress[0] = &device_param->hip_d_pws_idx; + device_param->kernel_params_decompress[1] = &device_param->hip_d_pws_comp_buf; + device_param->kernel_params_decompress[2] = (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) + ? &device_param->hip_d_pws_buf + : &device_param->hip_d_pws_amp_buf; + + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 0, sizeof (cl_mem), device_param->kernel_params_decompress[0]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 1, sizeof (cl_mem), device_param->kernel_params_decompress[1]); if (CL_rc == -1) return -1; + //CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]); if (CL_rc == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params_decompress[0] = &device_param->opencl_d_pws_idx; @@ -11392,6 +15077,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_decompress, 2, sizeof (cl_mem), device_param->kernel_params_decompress[2]) == -1) return -1; } + // context + + if (device_param->is_hip == true) + { + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) + { + device_param->skipped = true; + continue; + } + } + hardware_power_all += device_param->hardware_power; EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int)); @@ -11554,6 +15250,127 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) device_param->cuda_context = NULL; } + if (device_param->is_hip == true) + { + if (device_param->hip_d_pws_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_buf); + if (device_param->hip_d_pws_amp_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_amp_buf); + if (device_param->hip_d_pws_comp_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_comp_buf); + if (device_param->hip_d_pws_idx) hc_hipMemFree (hashcat_ctx, device_param->hip_d_pws_idx); + if (device_param->hip_d_rules) hc_hipMemFree (hashcat_ctx, device_param->hip_d_rules); + //if (device_param->hip_d_rules_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_rules_c); + if (device_param->hip_d_combs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_combs); + if (device_param->hip_d_combs_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_combs_c); + if (device_param->hip_d_bfs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bfs); + //if (device_param->hip_d_bfs_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bfs_c); + if (device_param->hip_d_bitmap_s1_a) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_a); + if (device_param->hip_d_bitmap_s1_b) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_b); + if (device_param->hip_d_bitmap_s1_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_c); + if (device_param->hip_d_bitmap_s1_d) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s1_d); + if (device_param->hip_d_bitmap_s2_a) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_a); + if (device_param->hip_d_bitmap_s2_b) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_b); + if (device_param->hip_d_bitmap_s2_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_c); + if (device_param->hip_d_bitmap_s2_d) hc_hipMemFree (hashcat_ctx, device_param->hip_d_bitmap_s2_d); + if (device_param->hip_d_plain_bufs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_plain_bufs); + if (device_param->hip_d_digests_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_digests_buf); + if (device_param->hip_d_digests_shown) hc_hipMemFree (hashcat_ctx, device_param->hip_d_digests_shown); + if (device_param->hip_d_salt_bufs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_salt_bufs); + if (device_param->hip_d_esalt_bufs) hc_hipMemFree (hashcat_ctx, device_param->hip_d_esalt_bufs); + if (device_param->hip_d_tmps) hc_hipMemFree (hashcat_ctx, device_param->hip_d_tmps); + if (device_param->hip_d_hooks) hc_hipMemFree (hashcat_ctx, device_param->hip_d_hooks); + if (device_param->hip_d_result) hc_hipMemFree (hashcat_ctx, device_param->hip_d_result); + if (device_param->hip_d_extra0_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra0_buf); + if (device_param->hip_d_extra1_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra1_buf); + if (device_param->hip_d_extra2_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra2_buf); + if (device_param->hip_d_extra3_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_extra3_buf); + if (device_param->hip_d_root_css_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_root_css_buf); + if (device_param->hip_d_markov_css_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_markov_css_buf); + if (device_param->hip_d_tm_c) hc_hipMemFree (hashcat_ctx, device_param->hip_d_tm_c); + if (device_param->hip_d_st_digests_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_digests_buf); + if (device_param->hip_d_st_salts_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_salts_buf); + if (device_param->hip_d_st_esalts_buf) hc_hipMemFree (hashcat_ctx, device_param->hip_d_st_esalts_buf); + + if (device_param->hip_event1) hc_hipEventDestroy (hashcat_ctx, device_param->hip_event1); + if (device_param->hip_event2) hc_hipEventDestroy (hashcat_ctx, device_param->hip_event2); + + if (device_param->hip_stream) hc_hipStreamDestroy (hashcat_ctx, device_param->hip_stream); + + if (device_param->hip_module) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module); + if (device_param->hip_module_mp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_mp); + if (device_param->hip_module_amp) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp); + if (device_param->hip_module_shared) hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared); + + if (device_param->hip_context) hc_hipCtxDestroy (hashcat_ctx, device_param->hip_context); + + device_param->hip_d_pws_buf = 0; + device_param->hip_d_pws_amp_buf = 0; + device_param->hip_d_pws_comp_buf = 0; + device_param->hip_d_pws_idx = 0; + device_param->hip_d_rules = 0; + device_param->hip_d_rules_c = 0; + device_param->hip_d_combs = 0; + device_param->hip_d_combs_c = 0; + device_param->hip_d_bfs = 0; + device_param->hip_d_bfs_c = 0; + device_param->hip_d_bitmap_s1_a = 0; + device_param->hip_d_bitmap_s1_b = 0; + device_param->hip_d_bitmap_s1_c = 0; + device_param->hip_d_bitmap_s1_d = 0; + device_param->hip_d_bitmap_s2_a = 0; + device_param->hip_d_bitmap_s2_b = 0; + device_param->hip_d_bitmap_s2_c = 0; + device_param->hip_d_bitmap_s2_d = 0; + device_param->hip_d_plain_bufs = 0; + device_param->hip_d_digests_buf = 0; + device_param->hip_d_digests_shown = 0; + device_param->hip_d_salt_bufs = 0; + device_param->hip_d_esalt_bufs = 0; + device_param->hip_d_tmps = 0; + device_param->hip_d_hooks = 0; + device_param->hip_d_result = 0; + device_param->hip_d_extra0_buf = 0; + device_param->hip_d_extra1_buf = 0; + device_param->hip_d_extra2_buf = 0; + device_param->hip_d_extra3_buf = 0; + device_param->hip_d_root_css_buf = 0; + device_param->hip_d_markov_css_buf = 0; + device_param->hip_d_tm_c = 0; + device_param->hip_d_st_digests_buf = 0; + device_param->hip_d_st_salts_buf = 0; + device_param->hip_d_st_esalts_buf = 0; + + device_param->hip_function1 = NULL; + device_param->hip_function12 = NULL; + device_param->hip_function2p = NULL; + device_param->hip_function2 = NULL; + device_param->hip_function2e = NULL; + device_param->hip_function23 = NULL; + device_param->hip_function3 = NULL; + device_param->hip_function4 = NULL; + device_param->hip_function_init2 = NULL; + device_param->hip_function_loop2p = NULL; + device_param->hip_function_loop2 = NULL; + device_param->hip_function_mp = NULL; + device_param->hip_function_mp_l = NULL; + device_param->hip_function_mp_r = NULL; + device_param->hip_function_tm = NULL; + device_param->hip_function_amp = NULL; + device_param->hip_function_memset = NULL; + device_param->hip_function_atinit = NULL; + device_param->hip_function_utf8toutf16le = NULL; + device_param->hip_function_decompress = NULL; + device_param->hip_function_aux1 = NULL; + device_param->hip_function_aux2 = NULL; + device_param->hip_function_aux3 = NULL; + device_param->hip_function_aux4 = NULL; + + device_param->hip_module = NULL; + device_param->hip_module_mp = NULL; + device_param->hip_module_amp = NULL; + device_param->hip_module_shared = NULL; + + device_param->hip_context = NULL; + } + if (device_param->is_opencl == true) { if (device_param->opencl_d_pws_buf) hc_clReleaseMemObject (hashcat_ctx, device_param->opencl_d_pws_buf); @@ -11849,6 +15666,15 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx) if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; } + if (device_param->is_hip == true) + { + //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_uint), device_param->kernel_params_mp[i]); if (CL_rc == -1) return -1; } + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf, mask_ctx->root_css_buf, device_param->size_root_css) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; + } + if (device_param->is_opencl == true) { for (u32 i = 3; i < 4; i++) { if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp, i, sizeof (cl_ulong), device_param->kernel_params_mp[i]) == -1) return -1; } @@ -11901,6 +15727,20 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_ if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; } + if (device_param->is_hip == true) + { + //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 4; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_uint), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 9; i < 9; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]); if (CL_rc == -1) return -1; } + + //for (u32 i = 3; i < 4; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 4; i < 7; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_uint), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + //for (u32 i = 8; i < 8; i++) { CL_rc = hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_r, i, sizeof (cl_ulong), device_param->kernel_params_mp_r[i]); if (CL_rc == -1) return -1; } + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf, mask_ctx->root_css_buf, device_param->size_root_css) == -1) return -1; + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1; + } + if (device_param->is_opencl == true) { for (u32 i = 3; i < 4; i++) { if (hc_clSetKernelArg (hashcat_ctx, device_param->opencl_kernel_mp_l, i, sizeof (cl_ulong), device_param->kernel_params_mp_l[i]) == -1) return -1; } diff --git a/src/dispatch.c b/src/dispatch.c index 25c40ea7f..d0cbfcfb6 100644 --- a/src/dispatch.c +++ b/src/dispatch.c @@ -350,6 +350,11 @@ HC_API_CALL void *thread_calc_stdin (void *p) if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; + } + if (calc_stdin (hashcat_ctx, device_param) == -1) { status_ctx_t *status_ctx = hashcat_ctx->status_ctx; @@ -357,6 +362,11 @@ HC_API_CALL void *thread_calc_stdin (void *p) status_ctx->devices_status = STATUS_ERROR; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; + } + return NULL; } @@ -1584,6 +1594,11 @@ HC_API_CALL void *thread_calc (void *p) if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; + } + if (calc (hashcat_ctx, device_param) == -1) { status_ctx_t *status_ctx = hashcat_ctx->status_ctx; @@ -1591,5 +1606,10 @@ HC_API_CALL void *thread_calc (void *p) status_ctx->devices_status = STATUS_ERROR; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; + } + return NULL; } diff --git a/src/hashes.c b/src/hashes.c index f1ee22334..27aa94370 100644 --- a/src/hashes.c +++ b/src/hashes.c @@ -322,6 +322,11 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size); } + if (device_param->is_hip == true) + { + hc_hipMemcpyDtoH (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size); + } + if (device_param->is_opencl == true) { hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL); @@ -481,6 +486,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) u32 num_cracked = 0; int CU_rc; + int HIP_rc; int CL_rc; if (device_param->is_cuda == true) @@ -490,6 +496,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32)); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); @@ -516,6 +529,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t)); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL); @@ -573,6 +593,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), &hashes->digests_shown_tmp[salt_buf->digests_offset], salt_buf->digests_cnt * sizeof (u32)); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL); @@ -611,6 +638,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (CU_rc == -1) return -1; } + if (device_param->is_hip == true) + { + HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_result, &num_cracked, sizeof (u32)); + + if (HIP_rc == -1) return -1; + } + if (device_param->is_opencl == true) { CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL); diff --git a/src/selftest.c b/src/selftest.c index 219f6d771..ea08e4a43 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -32,6 +32,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param device_param->kernel_params[18] = &device_param->cuda_d_st_esalts_buf; } + if (device_param->is_hip == true) + { + device_param->kernel_params[15] = &device_param->hip_d_st_digests_buf; + device_param->kernel_params[17] = &device_param->hip_d_st_salts_buf; + device_param->kernel_params[18] = &device_param->hip_d_st_esalts_buf; + } + if (device_param->is_opencl == true) { device_param->kernel_params[15] = &device_param->opencl_d_st_digests_buf; @@ -91,6 +98,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -126,6 +138,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -190,6 +207,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, &comb, 1 * sizeof (pw_t)) == -1) return -1; + + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL) == -1) return -1; @@ -225,6 +249,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -276,6 +305,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bfs_c, &bf, 1 * sizeof (bf_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bfs_c, &bf, 1 * sizeof (bf_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL) == -1) return -1; @@ -372,6 +406,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -400,6 +439,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1; @@ -446,6 +490,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_cuda_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->cuda_d_pws_buf, 1) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 1) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_utf8toutf16le (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, 1) == -1) return -1; @@ -463,6 +512,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -475,6 +529,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -526,6 +585,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -538,6 +602,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1; @@ -617,6 +686,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32)) == -1) return -1; } + if (device_param->is_hip == true) + { + if (hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32)) == -1) return -1; + } + if (device_param->is_opencl == true) { if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL) == -1) return -1; @@ -646,6 +720,20 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_result, device_param->size_results) == -1) return -1; } + if (device_param->is_hip == true) + { + device_param->kernel_params[15] = &device_param->hip_d_digests_buf; + device_param->kernel_params[17] = &device_param->hip_d_salt_bufs; + device_param->kernel_params[18] = &device_param->hip_d_esalt_bufs; + + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_pws_buf, device_param->size_pws) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_tmps, device_param->size_tmps) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, device_param->size_plains) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_digests_shown, device_param->size_shown) == -1) return -1; + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_result, device_param->size_results) == -1) return -1; + } + if (device_param->is_opencl == true) { device_param->kernel_params[15] = &device_param->opencl_d_digests_buf; @@ -667,6 +755,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1; @@ -681,6 +774,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_rules_c, device_param->size_rules_c) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_rules_c, device_param->size_rules_c) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_rules_c, device_param->size_rules_c) == -1) return -1; @@ -693,6 +791,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_combs_c, device_param->size_combs) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_combs_c, device_param->size_combs) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_combs_c, device_param->size_combs) == -1) return -1; @@ -705,6 +808,11 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param if (run_cuda_kernel_bzero (hashcat_ctx, device_param, device_param->cuda_d_bfs_c, device_param->size_bfs) == -1) return -1; } + if (device_param->is_hip == true) + { + if (run_hip_kernel_bzero (hashcat_ctx, device_param, device_param->hip_d_bfs_c, device_param->size_bfs) == -1) return -1; + } + if (device_param->is_opencl == true) { if (run_opencl_kernel_bzero (hashcat_ctx, device_param, device_param->opencl_d_bfs_c, device_param->size_bfs) == -1) return -1; @@ -713,19 +821,25 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param } // check return -//TODO: Add HIP in the above test. - if (num_cracked == 0 && false) + + if (num_cracked == 0) { hc_thread_mutex_lock (status_ctx->mux_display); + if (device_param->is_cuda == true) + { + event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! CUDA kernel self-test failed.", device_param->device_id + 1); + } + + if (device_param->is_hip == true) + { + event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! HIP kernel self-test failed.", device_param->device_id + 1); + } + if (device_param->is_opencl == true) { event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1); } - if (device_param->is_cuda == true) - { - event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! CUDA kernel self-test failed.", device_param->device_id + 1); - } event_log_warning (hashcat_ctx, "Your device driver installation is probably broken."); event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver"); @@ -735,6 +849,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param return -1; } + return 0; } @@ -763,6 +878,11 @@ HC_API_CALL void *thread_selftest (void *p) if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } + if (device_param->is_hip == true) + { + if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL; + } + const int rc_selftest = selftest (hashcat_ctx, device_param); if (user_options->benchmark == true) @@ -781,5 +901,10 @@ HC_API_CALL void *thread_selftest (void *p) } } + if (device_param->is_hip == true) + { + if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; + } + return NULL; } From ca3beacd93abe1c30a1bb6b33c8ae391f792156d Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sun, 11 Jul 2021 14:30:49 +0200 Subject: [PATCH 03/22] Disable dynamic shared memory on HIP, because hipFuncSetAttribute() maps to cudaFuncSetAttribute() and not to cuFuncSetAttribute() --- include/backend.h | 2 +- src/backend.c | 77 ++++++++++++++++++++++++++--------------------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/include/backend.h b/include/backend.h index a024aa37d..a3d997ca8 100644 --- a/include/backend.h +++ b/include/backend.h @@ -112,7 +112,7 @@ int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream); int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc); -int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value); +//int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value); int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra); int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize); diff --git a/src/backend.c b/src/backend.c index 5b12cd2c5..eafaec62e 100644 --- a/src/backend.c +++ b/src/backend.c @@ -2454,7 +2454,7 @@ int hip_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1); + //HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig, hipFuncSetCacheConfig, HIP_HIPFUNCSETCACHECONFIG, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, HIP_HIPFUNCSETSHAREDMEMCONFIG, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); @@ -3079,6 +3079,10 @@ int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_att return 0; } +/* + +// ATTENTION, this one maps to cudaFuncSetAttribute not cuFuncSetAttribute !!! + int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; @@ -3105,6 +3109,7 @@ int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfu return 0; } +*/ int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags) { @@ -5474,7 +5479,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con case KERN_RUN_AUX4: hip_function = device_param->hip_function_aux4; break; } - if (hc_hipFuncSetAttribute (hashcat_ctx, hip_function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1; + //if (hc_hipFuncSetAttribute (hashcat_ctx, hip_function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1; } if (kernel_threads == 0) kernel_threads = 1; @@ -10030,6 +10035,9 @@ static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunctio return 0; } +/* +not supported because there's no cuFuncSetAttribute equivalent + static int get_hip_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result) { // AFAIK there's no way to query the maximum value for dynamic shared memory available (because it depends on kernel code). @@ -10063,6 +10071,7 @@ static int get_hip_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx, HI return 0; } +*/ static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) { @@ -11706,7 +11715,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_local_mem_size_memset) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_memset, &device_param->kernel_dynamic_local_mem_size_memset) == -1) return -1; device_param->kernel_preferred_wgs_multiple_memset = device_param->hip_warp_size; @@ -11722,7 +11731,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_local_mem_size_atinit) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_atinit, &device_param->kernel_dynamic_local_mem_size_atinit) == -1) return -1; device_param->kernel_preferred_wgs_multiple_atinit = device_param->hip_warp_size; @@ -11737,7 +11746,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_local_mem_size_decompress) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_decompress, &device_param->kernel_dynamic_local_mem_size_decompress) == -1) return -1; device_param->kernel_preferred_wgs_multiple_decompress = device_param->hip_warp_size; @@ -11749,7 +11758,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_local_mem_size_utf8toutf16le) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_utf8toutf16le, &device_param->kernel_dynamic_local_mem_size_utf8toutf16le) == -1) return -1; device_param->kernel_preferred_wgs_multiple_utf8toutf16le = device_param->hip_warp_size; } @@ -13375,7 +13384,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; @@ -13389,7 +13398,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; @@ -13403,7 +13412,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; } @@ -13417,7 +13426,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; } @@ -13436,7 +13445,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; @@ -13450,7 +13459,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; @@ -13464,7 +13473,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; } @@ -13478,7 +13487,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_local_mem_size4) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function4, &device_param->kernel_dynamic_local_mem_size4) == -1) return -1; device_param->kernel_preferred_wgs_multiple4 = device_param->hip_warp_size; } @@ -13501,7 +13510,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_local_mem_size_tm) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_dynamic_local_mem_size_tm) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_tm, &device_param->kernel_dynamic_local_mem_size_tm) == -1) return -1; device_param->kernel_preferred_wgs_multiple_tm = device_param->hip_warp_size; } @@ -13520,7 +13529,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_local_mem_size1) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function1, &device_param->kernel_dynamic_local_mem_size1) == -1) return -1; device_param->kernel_preferred_wgs_multiple1 = device_param->hip_warp_size; @@ -13534,7 +13543,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_local_mem_size2) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2, &device_param->kernel_dynamic_local_mem_size2) == -1) return -1; device_param->kernel_preferred_wgs_multiple2 = device_param->hip_warp_size; @@ -13548,7 +13557,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_local_mem_size3) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function3, &device_param->kernel_dynamic_local_mem_size3) == -1) return -1; device_param->kernel_preferred_wgs_multiple3 = device_param->hip_warp_size; @@ -13564,7 +13573,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_local_mem_size2p) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2p, &device_param->kernel_dynamic_local_mem_size2p) == -1) return -1; device_param->kernel_preferred_wgs_multiple2p = device_param->hip_warp_size; } @@ -13581,7 +13590,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_local_mem_size2e) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_dynamic_local_mem_size2e) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function2e, &device_param->kernel_dynamic_local_mem_size2e) == -1) return -1; device_param->kernel_preferred_wgs_multiple2e = device_param->hip_warp_size; } @@ -13598,7 +13607,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_local_mem_size12) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_dynamic_local_mem_size12) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function12, &device_param->kernel_dynamic_local_mem_size12) == -1) return -1; device_param->kernel_preferred_wgs_multiple12 = device_param->hip_warp_size; } @@ -13615,7 +13624,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_local_mem_size23) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_dynamic_local_mem_size23) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function23, &device_param->kernel_dynamic_local_mem_size23) == -1) return -1; device_param->kernel_preferred_wgs_multiple23 = device_param->hip_warp_size; } @@ -13632,7 +13641,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_local_mem_size_init2) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_dynamic_local_mem_size_init2) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_init2, &device_param->kernel_dynamic_local_mem_size_init2) == -1) return -1; device_param->kernel_preferred_wgs_multiple_init2 = device_param->hip_warp_size; } @@ -13649,7 +13658,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_local_mem_size_loop2p) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2p, &device_param->kernel_dynamic_local_mem_size_loop2p) == -1) return -1; device_param->kernel_preferred_wgs_multiple_loop2p = device_param->hip_warp_size; } @@ -13666,7 +13675,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_local_mem_size_loop2) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_dynamic_local_mem_size_loop2) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_loop2, &device_param->kernel_dynamic_local_mem_size_loop2) == -1) return -1; device_param->kernel_preferred_wgs_multiple_loop2 = device_param->hip_warp_size; } @@ -13683,7 +13692,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_local_mem_size_aux1) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_dynamic_local_mem_size_aux1) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux1, &device_param->kernel_dynamic_local_mem_size_aux1) == -1) return -1; device_param->kernel_preferred_wgs_multiple_aux1 = device_param->hip_warp_size; } @@ -13700,7 +13709,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_local_mem_size_aux2) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_dynamic_local_mem_size_aux2) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux2, &device_param->kernel_dynamic_local_mem_size_aux2) == -1) return -1; device_param->kernel_preferred_wgs_multiple_aux2 = device_param->hip_warp_size; } @@ -13717,7 +13726,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_local_mem_size_aux3) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_dynamic_local_mem_size_aux3) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux3, &device_param->kernel_dynamic_local_mem_size_aux3) == -1) return -1; device_param->kernel_preferred_wgs_multiple_aux3 = device_param->hip_warp_size; } @@ -13734,7 +13743,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_local_mem_size_aux4) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_dynamic_local_mem_size_aux4) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_aux4, &device_param->kernel_dynamic_local_mem_size_aux4) == -1) return -1; device_param->kernel_preferred_wgs_multiple_aux4 = device_param->hip_warp_size; } @@ -13762,7 +13771,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_local_mem_size_mp_l) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_dynamic_local_mem_size_mp_l) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_l, &device_param->kernel_dynamic_local_mem_size_mp_l) == -1) return -1; device_param->kernel_preferred_wgs_multiple_mp_l = device_param->hip_warp_size; @@ -13774,7 +13783,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_local_mem_size_mp_r) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_dynamic_local_mem_size_mp_r) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp_r, &device_param->kernel_dynamic_local_mem_size_mp_r) == -1) return -1; device_param->kernel_preferred_wgs_multiple_mp_r = device_param->hip_warp_size; @@ -13795,7 +13804,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; } @@ -13807,7 +13816,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_local_mem_size_mp) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_mp, &device_param->kernel_dynamic_local_mem_size_mp) == -1) return -1; device_param->kernel_preferred_wgs_multiple_mp = device_param->hip_warp_size; } @@ -13830,7 +13839,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_hip_kernel_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_local_mem_size_amp) == -1) return -1; - if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_dynamic_local_mem_size_amp) == -1) return -1; + //if (get_hip_kernel_dynamic_local_mem_size (hashcat_ctx, device_param->hip_function_amp, &device_param->kernel_dynamic_local_mem_size_amp) == -1) return -1; device_param->kernel_preferred_wgs_multiple_amp = device_param->hip_warp_size; } From fde47702196a22900473750b0d9a57cfdab1be00 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sun, 11 Jul 2021 14:47:48 +0200 Subject: [PATCH 04/22] Add misssing HIP backend information in -I output --- src/terminal.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/terminal.c b/src/terminal.c index 89f69d6e5..e99836032 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -810,6 +810,54 @@ void backend_info (hashcat_ctx_t *hashcat_ctx) } } + if (backend_ctx->hip) + { + event_log_info (hashcat_ctx, "HIP Info:"); + event_log_info (hashcat_ctx, "========="); + event_log_info (hashcat_ctx, NULL); + + int hip_devices_cnt = backend_ctx->hip_devices_cnt; + int hip_driver_version = backend_ctx->hip_driver_version; + + event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driver_version / 1000, (hip_driver_version % 100) / 10); + event_log_info (hashcat_ctx, NULL); + + for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++) + { + const int backend_devices_idx = backend_ctx->backend_device_from_hip[hip_devices_idx]; + + const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx; + + int device_id = device_param->device_id; + char *device_name = device_param->device_name; + u32 device_processors = device_param->device_processors; + u32 device_maxclock_frequency = device_param->device_maxclock_frequency; + u64 device_available_mem = device_param->device_available_mem; + u64 device_global_mem = device_param->device_global_mem; + u8 pcie_domain = device_param->pcie_domain; + u8 pcie_bus = device_param->pcie_bus; + u8 pcie_device = device_param->pcie_device; + u8 pcie_function = device_param->pcie_function; + + if (device_param->device_id_alias_cnt) + { + event_log_info (hashcat_ctx, "Backend Device ID #%d (Alias: #%d)", device_id + 1, device_param->device_id_alias_buf[0] + 1); + } + else + { + event_log_info (hashcat_ctx, "Backend Device ID #%d", device_id + 1); + } + + event_log_info (hashcat_ctx, " Name...........: %s", device_name); + event_log_info (hashcat_ctx, " Processor(s)...: %u", device_processors); + event_log_info (hashcat_ctx, " Clock..........: %u", device_maxclock_frequency); + event_log_info (hashcat_ctx, " Memory.Total...: %" PRIu64 " MB", device_global_mem / 1024 / 1024); + event_log_info (hashcat_ctx, " Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024); + event_log_info (hashcat_ctx, " PCI.Addr.BDFe..: %04x:%02x:%02x.%d", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function); + event_log_info (hashcat_ctx, NULL); + } + } + if (backend_ctx->ocl) { event_log_info (hashcat_ctx, "OpenCL Info:"); From 20f7febd4c392582c0e925d8201e250e821feef1 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sun, 11 Jul 2021 15:54:13 +0200 Subject: [PATCH 05/22] Workaround too intensive optimization in -m 2000 using HIPRTC --- OpenCL/inc_platform.cl | 2 +- src/terminal.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 40002c3eb..1a63e7c3a 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -168,7 +168,7 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n) #if ATTACK_EXEC == 11 -CONSTANT_VK u32 generic_constant[8192]; // 32k +CONSTANT_VK u32 generic_constant[8192] __attribute__((used)); // 32k #if ATTACK_KERN == 0 #define bfs_buf g_bfs_buf diff --git a/src/terminal.c b/src/terminal.c index e99836032..54bffed10 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -956,6 +956,10 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx) if (user_options->machine_readable == true) return; if (user_options->status_json == true) return; + /** + * CUDA + */ + if (backend_ctx->cuda) { int cuda_devices_cnt = backend_ctx->cuda_devices_cnt; @@ -1003,9 +1007,10 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx) event_log_info (hashcat_ctx, NULL); } - /* - * HIP - */ + /** + * HIP + */ + if (backend_ctx->hip) { int hip_devices_cnt = backend_ctx->hip_devices_cnt; @@ -1053,9 +1058,10 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx) event_log_info (hashcat_ctx, NULL); } - /* - * OCL - */ + /** + * OpenCL + */ + if (backend_ctx->ocl) { cl_uint opencl_platforms_cnt = backend_ctx->opencl_platforms_cnt; From 23c3c178bff9349daa144d8245c54805c74aabc7 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 12 Jul 2021 09:28:26 +0200 Subject: [PATCH 06/22] Limit max threads per block to 64 to enable offline compiler to make better use if available registers Fix double free() for hip_event1/hip_event2 and hip_stream causes segfault Replace hc_cuCtxSetCurrent() with hc_cuCtxPushCurrent() in order to align changes with HIP Add vector datatype operators (if we decide to use them - currently unused) --- OpenCL/inc_types.h | 760 +++++++++++++++++++++++++++++++++++++ src/autotune.c | 9 +- src/backend.c | 46 ++- src/dispatch.c | 14 +- src/modules/module_17200.c | 5 + src/modules/module_17220.c | 5 + src/modules/module_17225.c | 5 + src/modules/module_19600.c | 5 + src/modules/module_19700.c | 5 + src/selftest.c | 7 +- 10 files changed, 853 insertions(+), 8 deletions(-) diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 1041a0d7f..cbd2fde69 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -824,6 +824,766 @@ typedef __device_builtin__ struct u64x u64x; #define make_u32x u32x #define make_u64x u64x +#elif defined IS_HIP + +// seems to work, but slow + +/* +#if VECT_SIZE == 2 + +struct u8x +{ + u8 s0; + u8 s1; + + inline __device__ u8x (const u8 a, const u8 b) : s0(a), s1(b) { } + inline __device__ u8x (const u8 a) : s0(a), s1(a) { } + + inline __device__ u8x (void) : s0(0), s1(0) { } + inline __device__ ~u8x (void) { } +}; + +struct u16x +{ + u16 s0; + u16 s1; + + inline __device__ u16x (const u16 a, const u16 b) : s0(a), s1(b) { } + inline __device__ u16x (const u16 a) : s0(a), s1(a) { } + + inline __device__ u16x (void) : s0(0), s1(0) { } + inline __device__ ~u16x (void) { } +}; + +struct u32x +{ + u32 s0; + u32 s1; + + inline __device__ u32x (const u32 a, const u32 b) : s0(a), s1(b) { } + inline __device__ u32x (const u32 a) : s0(a), s1(a) { } + + inline __device__ u32x (void) : s0(0), s1(0) { } + inline __device__ ~u32x (void) { } +}; + +struct u64x +{ + u64 s0; + u64 s1; + + inline __device__ u64x (const u64 a, const u64 b) : s0(a), s1(b) { } + inline __device__ u64x (const u64 a) : s0(a), s1(a) { } + + inline __device__ u64x (void) : s0(0), s1(0) { } + inline __device__ ~u64x (void) { } +}; + +inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b)); } +inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); } + +inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; } +inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; } + +inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; } +inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; } + +inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; } +inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; } + +inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; } +inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; } + +inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; } +inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; } + +inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; } +inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; } + +inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; } +inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; } + +inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; } +inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; } + +inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) ); } +inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1)); } + +inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) ); } +inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1)); } + +inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) ); } +inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1)); } + +inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) ); } +inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1)); } + +inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) ); } +inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1)); } + +inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) ); } +inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1)); } + +inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) ); } +inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1)); } + +inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) ); } +inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1)); } + +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1)); } + +inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1); } + +inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b)); } +inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); } + +inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; } +inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; } + +inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; } +inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; } + +inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; } +inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; } + +inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; } +inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; } + +inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; } +inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; } + +inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; } +inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; } + +inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; } +inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; } + +inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; } +inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; } + +inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) ); } +inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1)); } + +inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) ); } +inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1)); } + +inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) ); } +inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1)); } + +inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) ); } +inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1)); } + +inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) ); } +inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1)); } + +inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) ); } +inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1)); } + +inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) ); } +inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1)); } + +inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) ); } +inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1)); } + +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1)); } + +inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1); } + +#endif + +#if VECT_SIZE == 4 + +struct u8x +{ + u8 s0; + u8 s1; + u8 s2; + u8 s3; + + inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { } + inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a) { } + + inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0) { } + inline __device__ ~u8x (void) { } +}; + +struct u16x +{ + u16 s0; + u16 s1; + u16 s2; + u16 s3; + + inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { } + inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a) { } + + inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0) { } + inline __device__ ~u16x (void) { } +}; + +struct u32x +{ + u32 s0; + u32 s1; + u32 s2; + u32 s3; + + inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { } + inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a) { } + + inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0) { } + inline __device__ ~u32x (void) { } +}; + +struct u64x +{ + u64 s0; + u64 s1; + u64 s2; + u64 s3; + + inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { } + inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a) { } + + inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0) { } + inline __device__ ~u64x (void) { } +}; + +inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); } +inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); } + +inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; } +inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; } + +inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; } +inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; } + +inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; } +inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; } + +inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; } +inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; } + +inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; } +inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; } + +inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; } +inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; } + +inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; } +inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; } + +inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; } +inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; } + +inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) ); } +inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3)); } + +inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) ); } +inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3)); } + +inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) ); } +inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3)); } + +inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) ); } +inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3)); } + +inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) ); } +inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3)); } + +inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) ); } +inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3)); } + +inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) ); } +inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3)); } + +inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); } +inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); } + +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); } + +inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); } + +inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); } +inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); } + +inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; } +inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; } + +inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; } +inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; } + +inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; } +inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; } + +inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; } +inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; } + +inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; } +inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; } + +inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; } +inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; } + +inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; } +inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; } + +inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; } +inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; } + +inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) ); } +inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3)); } + +inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) ); } +inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3)); } + +inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) ); } +inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3)); } + +inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) ); } +inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3)); } + +inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) ); } +inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3)); } + +inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) ); } +inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3)); } + +inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) ); } +inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3)); } + +inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); } +inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); } + +inline __device__ u64x operator % (const u64x a, const u32 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); } + +inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); } + +#endif + +#if VECT_SIZE == 8 + +struct u8x +{ + u8 s0; + u8 s1; + u8 s2; + u8 s3; + u8 s4; + u8 s5; + u8 s6; + u8 s7; + + inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { } + inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { } + + inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { } + inline __device__ ~u8x (void) { } +}; + +struct u16x +{ + u16 s0; + u16 s1; + u16 s2; + u16 s3; + u16 s4; + u16 s5; + u16 s6; + u16 s7; + + inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { } + inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { } + + inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { } + inline __device__ ~u16x (void) { } +}; + +struct u32x +{ + u32 s0; + u32 s1; + u32 s2; + u32 s3; + u32 s4; + u32 s5; + u32 s6; + u32 s7; + + inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { } + inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { } + + inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { } + inline __device__ ~u32x (void) { } +}; + +struct u64x +{ + u64 s0; + u64 s1; + u64 s2; + u64 s3; + u64 s4; + u64 s5; + u64 s6; + u64 s7; + + inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { } + inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { } + + inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { } + inline __device__ ~u64x (void) { } +}; + +inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); } +inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); } + +inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; } +inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; } + +inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; } +inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; } + +inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; } +inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; } + +inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; } +inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; } + +inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; } +inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; } + +inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; } +inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; } + +inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; } +inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; } + +inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; } +inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; } + +inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b) ); } +inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7)); } + +inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b) ); } +inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7)); } + +inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b) ); } +inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7)); } + +inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b) ); } +inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7)); } + +inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b) ); } +inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7)); } + +inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b) ); } +inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7)); } + +inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b) ); } +inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7)); } + +inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); } +inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); } + +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); } + +inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); } + +inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); } +inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); } + +inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; } +inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; } + +inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; } +inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; } + +inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; } +inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; } + +inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; } +inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; } + +inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; } +inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; } + +inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; } +inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; } + +inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; } +inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; } + +inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; } +inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; } + +inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b) ); } +inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7)); } + +inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b) ); } +inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7)); } + +inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b) ); } +inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7)); } + +inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b) ); } +inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7)); } + +inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b) ); } +inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7)); } + +inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b) ); } +inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7)); } + +inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b) ); } +inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7)); } + +inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); } +inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); } + +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); } + +inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); } + +#endif + +#if VECT_SIZE == 16 + +struct u8x +{ + u8 s0; + u8 s1; + u8 s2; + u8 s3; + u8 s4; + u8 s5; + u8 s6; + u8 s7; + u8 s8; + u8 s9; + u8 sa; + u8 sb; + u8 sc; + u8 sd; + u8 se; + u8 sf; + + inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { } + inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { } + + inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { } + inline __device__ ~u8x (void) { } +}; + +struct u16x +{ + u16 s0; + u16 s1; + u16 s2; + u16 s3; + u16 s4; + u16 s5; + u16 s6; + u16 s7; + u16 s8; + u16 s9; + u16 sa; + u16 sb; + u16 sc; + u16 sd; + u16 se; + u16 sf; + + inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { } + inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { } + + inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ } + inline __device__ ~u16x (void) { } +}; + +struct u32x +{ + u32 s0; + u32 s1; + u32 s2; + u32 s3; + u32 s4; + u32 s5; + u32 s6; + u32 s7; + u32 s8; + u32 s9; + u32 sa; + u32 sb; + u32 sc; + u32 sd; + u32 se; + u32 sf; + + inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { } + inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { } + + inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ } + inline __device__ ~u32x (void) { } +}; + +struct u64x +{ + u64 s0; + u64 s1; + u64 s2; + u64 s3; + u64 s4; + u64 s5; + u64 s6; + u64 s7; + u64 s8; + u64 s9; + u64 sa; + u64 sb; + u64 sc; + u64 sd; + u64 se; + u64 sf; + + inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { } + inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { } + + inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { } + inline __device__ ~u64x (void) { } +}; + +inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); } +inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); } + +inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; a.s8 ^= b; a.s9 ^= b; a.sa ^= b; a.sb ^= b; a.sc ^= b; a.sd ^= b; a.se ^= b; a.sf ^= b; } +inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; } + +inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; a.s8 |= b; a.s9 |= b; a.sa |= b; a.sb |= b; a.sc |= b; a.sd |= b; a.se |= b; a.sf |= b; } +inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; } + +inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; a.s8 &= b; a.s9 &= b; a.sa &= b; a.sb &= b; a.sc &= b; a.sd &= b; a.se &= b; a.sf &= b; } +inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; } + +inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; a.s8 += b; a.s9 += b; a.sa += b; a.sb += b; a.sc += b; a.sd += b; a.se += b; a.sf += b; } +inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; } + +inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; a.s8 -= b; a.s9 -= b; a.sa -= b; a.sb -= b; a.sc -= b; a.sd -= b; a.se -= b; a.sf -= b; } +inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; } + +inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; a.s8 *= b; a.s9 *= b; a.sa *= b; a.sb *= b; a.sc *= b; a.sd *= b; a.se *= b; a.sf *= b; } +inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; } + +inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; a.s8 >>= b; a.s9 >>= b; a.sa >>= b; a.sb >>= b; a.sc >>= b; a.sd >>= b; a.se >>= b; a.sf >>= b; } +inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; } + +inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; a.s8 <<= b; a.s9 <<= b; a.sa <<= b; a.sb <<= b; a.sc <<= b; a.sd <<= b; a.se <<= b; a.sf <<= b; } +inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; } + +inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b), (a.s8 << b), (a.s9 << b) , (a.sa << b), (a.sb << b) , (a.sc << b), (a.sd << b) , (a.se << b), (a.sf << b) ); } +inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf)); } + +inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b), (a.s8 >> b), (a.s9 >> b) , (a.sa >> b), (a.sb >> b) , (a.sc >> b), (a.sd >> b) , (a.se >> b), (a.sf >> b) ); } +inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf)); } + +inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b), (a.s8 ^ b), (a.s9 ^ b) , (a.sa ^ b), (a.sb ^ b) , (a.sc ^ b), (a.sd ^ b) , (a.se ^ b), (a.sf ^ b) ); } +inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7), (a.s8 ^ b.s8), (a.s9 ^ b.s9), (a.sa ^ b.sa), (a.sb ^ b.sb), (a.sc ^ b.sc), (a.sd ^ b.sd), (a.se ^ b.se), (a.sf ^ b.sf)); } + +inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b), (a.s8 | b), (a.s9 | b) , (a.sa | b), (a.sb | b) , (a.sc | b), (a.sd | b) , (a.se | b), (a.sf | b) ); } +inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7), (a.s8 | b.s8), (a.s9 | b.s9), (a.sa | b.sa), (a.sb | b.sb), (a.sc | b.sc), (a.sd | b.sd), (a.se | b.se), (a.sf | b.sf)); } + +inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b), (a.s8 & b), (a.s9 & b) , (a.sa & b), (a.sb & b) , (a.sc & b), (a.sd & b) , (a.se & b), (a.sf & b) ); } +inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7), (a.s8 & b.s8), (a.s9 & b.s9), (a.sa & b.sa), (a.sb & b.sb), (a.sc & b.sc), (a.sd & b.sd), (a.se & b.se), (a.sf & b.sf)); } + +inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b), (a.s8 + b), (a.s9 + b) , (a.sa + b), (a.sb + b) , (a.sc + b), (a.sd + b) , (a.se + b), (a.sf + b) ); } +inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7), (a.s8 + b.s8), (a.s9 + b.s9), (a.sa + b.sa), (a.sb + b.sb), (a.sc + b.sc), (a.sd + b.sd), (a.se + b.se), (a.sf + b.sf)); } + +inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b), (a.s8 - b), (a.s9 - b) , (a.sa - b), (a.sb - b) , (a.sc - b), (a.sd - b) , (a.se - b), (a.sf - b) ); } +inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7), (a.s8 - b.s8), (a.s9 - b.s9), (a.sa - b.sa), (a.sb - b.sb), (a.sc - b.sc), (a.sd - b.sd), (a.se - b.se), (a.sf - b.sf)); } + +inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); } +inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); } + +inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); } +inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); } + +inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); } + +inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); } +inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); } + +inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; a.s8 ^= b; a.s9 ^= b; a.sa ^= b; a.sb ^= b; a.sc ^= b; a.sd ^= b; a.se ^= b; a.sf ^= b; } +inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; } + +inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; a.s8 |= b; a.s9 |= b; a.sa |= b; a.sb |= b; a.sc |= b; a.sd |= b; a.se |= b; a.sf |= b; } +inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; } + +inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; a.s8 &= b; a.s9 &= b; a.sa &= b; a.sb &= b; a.sc &= b; a.sd &= b; a.se &= b; a.sf &= b; } +inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; } + +inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; a.s8 += b; a.s9 += b; a.sa += b; a.sb += b; a.sc += b; a.sd += b; a.se += b; a.sf += b; } +inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; } + +inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; a.s8 -= b; a.s9 -= b; a.sa -= b; a.sb -= b; a.sc -= b; a.sd -= b; a.se -= b; a.sf -= b; } +inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; } + +inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; a.s8 *= b; a.s9 *= b; a.sa *= b; a.sb *= b; a.sc *= b; a.sd *= b; a.se *= b; a.sf *= b; } +inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; } + +inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; a.s8 >>= b; a.s9 >>= b; a.sa >>= b; a.sb >>= b; a.sc >>= b; a.sd >>= b; a.se >>= b; a.sf >>= b; } +inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; } + +inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; a.s8 <<= b; a.s9 <<= b; a.sa <<= b; a.sb <<= b; a.sc <<= b; a.sd <<= b; a.se <<= b; a.sf <<= b; } +inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; } + +inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b), (a.s8 << b), (a.s9 << b) , (a.sa << b), (a.sb << b) , (a.sc << b), (a.sd << b) , (a.se << b), (a.sf << b) ); } +inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf)); } + +inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b), (a.s8 >> b), (a.s9 >> b) , (a.sa >> b), (a.sb >> b) , (a.sc >> b), (a.sd >> b) , (a.se >> b), (a.sf >> b) ); } +inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf)); } + +inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b), (a.s8 ^ b), (a.s9 ^ b) , (a.sa ^ b), (a.sb ^ b) , (a.sc ^ b), (a.sd ^ b) , (a.se ^ b), (a.sf ^ b) ); } +inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7), (a.s8 ^ b.s8), (a.s9 ^ b.s9), (a.sa ^ b.sa), (a.sb ^ b.sb), (a.sc ^ b.sc), (a.sd ^ b.sd), (a.se ^ b.se), (a.sf ^ b.sf)); } + +inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b), (a.s8 | b), (a.s9 | b) , (a.sa | b), (a.sb | b) , (a.sc | b), (a.sd | b) , (a.se | b), (a.sf | b) ); } +inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7), (a.s8 | b.s8), (a.s9 | b.s9), (a.sa | b.sa), (a.sb | b.sb), (a.sc | b.sc), (a.sd | b.sd), (a.se | b.se), (a.sf | b.sf)); } + +inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b), (a.s8 & b), (a.s9 & b) , (a.sa & b), (a.sb & b) , (a.sc & b), (a.sd & b) , (a.se & b), (a.sf & b) ); } +inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7), (a.s8 & b.s8), (a.s9 & b.s9), (a.sa & b.sa), (a.sb & b.sb), (a.sc & b.sc), (a.sd & b.sd), (a.se & b.se), (a.sf & b.sf)); } + +inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b), (a.s8 + b), (a.s9 + b) , (a.sa + b), (a.sb + b) , (a.sc + b), (a.sd + b) , (a.se + b), (a.sf + b) ); } +inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7), (a.s8 + b.s8), (a.s9 + b.s9), (a.sa + b.sa), (a.sb + b.sb), (a.sc + b.sc), (a.sd + b.sd), (a.se + b.se), (a.sf + b.sf)); } + +inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b), (a.s8 - b), (a.s9 - b) , (a.sa - b), (a.sb - b) , (a.sc - b), (a.sd - b) , (a.se - b), (a.sf - b) ); } +inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7), (a.s8 - b.s8), (a.s9 - b.s9), (a.sa - b.sa), (a.sb - b.sb), (a.sc - b.sc), (a.sd - b.sd), (a.se - b.se), (a.sf - b.sf)); } + +inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); } +inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); } + +inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); } +inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); } + +inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); } + +#endif + +typedef struct u8x u8x; +typedef struct u16x u16x; +typedef struct u32x u32x; +typedef struct u64x u64x; + +#define make_u8x u8x +#define make_u16x u16x +#define make_u32x u32x +#define make_u64x u64x +*/ + #else typedef VTYPE(uchar, VECT_SIZE) u8x; typedef VTYPE(ushort, VECT_SIZE) u16x; diff --git a/src/autotune.c b/src/autotune.c index cbb1ff499..f9aeddd83 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -487,9 +487,7 @@ HC_API_CALL void *thread_autotune (void *p) if (device_param->is_cuda == true) { - const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context); - - if (rc_cuCtxSetCurrent == -1) return NULL; + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } if (device_param->is_hip == true) @@ -504,6 +502,11 @@ HC_API_CALL void *thread_autotune (void *p) // we should do something here, tell hashcat main that autotune failed to abort } + if (device_param->is_cuda == true) + { + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; + } + if (device_param->is_hip == true) { if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; diff --git a/src/backend.c b/src/backend.c index eafaec62e..6620d3d42 100644 --- a/src/backend.c +++ b/src/backend.c @@ -7979,7 +7979,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) continue; } - if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) + if (hc_cuCtxPushCurrent (hashcat_ctx, cuda_context) == -1) { device_param->skipped = true; continue; @@ -7996,6 +7996,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->device_available_mem = (u64) free; + if (hc_cuCtxPopCurrent (hashcat_ctx, &cuda_context) == -1) + { + device_param->skipped = true; + continue; + } + if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) { device_param->skipped = true; @@ -10178,6 +10184,12 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param) gpu_prefered_thread_count = 32; } + kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count); + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + u32 gpu_prefered_thread_count = 64; + kernel_threads_max = MIN (kernel_threads_max, gpu_prefered_thread_count); } } @@ -10484,8 +10496,9 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p //hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor); // TODO HIP + // no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context - hiprtc_options[0] = ""; + hiprtc_options[0] = "--gpu-max-threads-per-block=64"; hiprtc_options[1] = ""; hiprtc_options[2] = ""; hiprtc_options[3] = ""; @@ -11242,6 +11255,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; @@ -11274,6 +11291,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->skipped = true; continue; } + + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) + { + device_param->skipped = true; + continue; + } } if (device_param->is_hip == true) @@ -15088,6 +15111,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // context + if (device_param->is_cuda == true) + { + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) + { + device_param->skipped = true; + continue; + } + } + if (device_param->is_hip == true) { if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) @@ -15251,6 +15283,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) device_param->cuda_function_aux3 = NULL; device_param->cuda_function_aux4 = NULL; + device_param->cuda_event1 = NULL; + device_param->cuda_event2 = NULL; + + device_param->cuda_stream = NULL; + device_param->cuda_module = NULL; device_param->cuda_module_mp = NULL; device_param->cuda_module_amp = NULL; @@ -15372,6 +15409,11 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx) device_param->hip_function_aux3 = NULL; device_param->hip_function_aux4 = NULL; + device_param->hip_event1 = NULL; + device_param->hip_event2 = NULL; + + device_param->hip_stream = NULL; + device_param->hip_module = NULL; device_param->hip_module_mp = NULL; device_param->hip_module_amp = NULL; diff --git a/src/dispatch.c b/src/dispatch.c index d0cbfcfb6..337b25fc2 100644 --- a/src/dispatch.c +++ b/src/dispatch.c @@ -347,7 +347,7 @@ HC_API_CALL void *thread_calc_stdin (void *p) if (device_param->is_cuda == true) { - if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } if (device_param->is_hip == true) @@ -362,6 +362,11 @@ HC_API_CALL void *thread_calc_stdin (void *p) status_ctx->devices_status = STATUS_ERROR; } + if (device_param->is_cuda == true) + { + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; + } + if (device_param->is_hip == true) { if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; @@ -1591,7 +1596,7 @@ HC_API_CALL void *thread_calc (void *p) if (device_param->is_cuda == true) { - if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } if (device_param->is_hip == true) @@ -1606,6 +1611,11 @@ HC_API_CALL void *thread_calc (void *p) status_ctx->devices_status = STATUS_ERROR; } + if (device_param->is_cuda == true) + { + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; + } + if (device_param->is_hip == true) { if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; diff --git a/src/modules/module_17200.c b/src/modules/module_17200.c index fa510ba27..44798110d 100644 --- a/src/modules/module_17200.c +++ b/src/modules/module_17200.c @@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 // workaround would be to rewrite kernel to use global memory + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + return true; + } + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { return true; diff --git a/src/modules/module_17220.c b/src/modules/module_17220.c index 8f1beaf1a..9028040d9 100644 --- a/src/modules/module_17220.c +++ b/src/modules/module_17220.c @@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 // workaround would be to rewrite kernel to use global memory + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + return true; + } + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { return true; diff --git a/src/modules/module_17225.c b/src/modules/module_17225.c index 3b3291d5f..75c376c9e 100644 --- a/src/modules/module_17225.c +++ b/src/modules/module_17225.c @@ -170,6 +170,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 // workaround would be to rewrite kernel to use global memory + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + return true; + } + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { return true; diff --git a/src/modules/module_19600.c b/src/modules/module_19600.c index 343b09637..b0ff3acb4 100644 --- a/src/modules/module_19600.c +++ b/src/modules/module_19600.c @@ -68,6 +68,11 @@ static const char *SIGNATURE_KRB5TGS = "$krb5tgs$17$"; bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + return true; + } + // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { diff --git a/src/modules/module_19700.c b/src/modules/module_19700.c index 7d353e598..6bb194499 100644 --- a/src/modules/module_19700.c +++ b/src/modules/module_19700.c @@ -68,6 +68,11 @@ static const char *SIGNATURE_KRB5TGS = "$krb5tgs$18$"; bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + return true; + } + // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { diff --git a/src/selftest.c b/src/selftest.c index ea08e4a43..4f8dc1092 100644 --- a/src/selftest.c +++ b/src/selftest.c @@ -875,7 +875,7 @@ HC_API_CALL void *thread_selftest (void *p) if (device_param->is_cuda == true) { - if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; + if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL; } if (device_param->is_hip == true) @@ -901,6 +901,11 @@ HC_API_CALL void *thread_selftest (void *p) } } + if (device_param->is_cuda == true) + { + if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL; + } + if (device_param->is_hip == true) { if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL; From 674ca7d88f919f8305af59dbfd918a436c0c9889 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 12 Jul 2021 11:27:05 +0200 Subject: [PATCH 07/22] Add GPU threads to kernel cache checksum because it has an influence on HIP offline compile options Add V_ALIGNBIT_B32 inline assembly wrapper because HIP does not provide amd_bitalign() --- OpenCL/inc_platform.cl | 39 ++++++++++++++++++++++++++++++++++++--- src/backend.c | 13 ++++++++----- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 1a63e7c3a..806a403e4 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -246,22 +246,55 @@ DECLSPEC u32 rotr32_S (const u32 a, const int n) DECLSPEC u64x rotl64 (const u64x a, const int n) { - return ((a << n) | ((a >> (64 - n)))); + return rotr64 (a, 64 - n); +} + +DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n) +{ + u32 r = 0; + + __asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "v"(n)); + + return r; } DECLSPEC u64x rotr64 (const u64x a, const int n) { + #if VECT_SIZE == 1 + return rotr64_S (a, n); + #else return ((a >> n) | ((a << (64 - n)))); + #endif } DECLSPEC u64 rotl64_S (const u64 a, const int n) { - return ((a << n) | ((a >> (64 - n)))); + return rotr64_S (a, 64 - n); } DECLSPEC u64 rotr64_S (const u64 a, const int n) { - return ((a >> n) | ((a << (64 - n)))); + vconv64_t in; + + in.v64 = a; + + const u32 a0 = in.v32.a; + const u32 a1 = in.v32.b; + + vconv64_t out; + + if (n < 32) + { + out.v32.a = amd_bitalign_S (a1, a0, n); + out.v32.b = amd_bitalign_S (a0, a1, n); + } + else + { + out.v32.a = amd_bitalign_S (a0, a1, n - 32); + out.v32.b = amd_bitalign_S (a1, a0, n - 32); + } + + return out.v64; } #define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0) diff --git a/src/backend.c b/src/backend.c index 6620d3d42..02cbbc3dd 100644 --- a/src/backend.c +++ b/src/backend.c @@ -10498,8 +10498,9 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p // TODO HIP // no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context - hiprtc_options[0] = "--gpu-max-threads-per-block=64"; - hiprtc_options[1] = ""; + hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%u", device_param->kernel_threads); + + hiprtc_options[1] = "-O3"; hiprtc_options[2] = ""; hiprtc_options[3] = ""; @@ -11588,7 +11589,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 }; - const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s", + const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d", backend_ctx->comptime, backend_ctx->cuda_driver_version, backend_ctx->hip_driver_version, @@ -11596,7 +11597,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->opencl_platform_vendor_id, device_param->device_name, device_param->opencl_device_version, - device_param->opencl_driver_version); + device_param->opencl_driver_version, + device_param->kernel_threads); md5_ctx_t md5_ctx; @@ -11887,7 +11889,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE; - const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s", + const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%d-%u-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, backend_ctx->hip_driver_version, @@ -11897,6 +11899,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->opencl_device_version, device_param->opencl_driver_version, device_param->vector_width, + device_param->kernel_threads, hashconfig->kern_type, extra_value, build_options_module_buf); From 219bed457f0b376593da064ab38026404d63f769 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 12 Jul 2021 14:02:43 +0200 Subject: [PATCH 08/22] Fix use of --gpu-max-threads-per-block --- src/backend.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/backend.c b/src/backend.c index 02cbbc3dd..defd20aeb 100644 --- a/src/backend.c +++ b/src/backend.c @@ -10498,9 +10498,8 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p // TODO HIP // no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context - hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%u", device_param->kernel_threads); - - hiprtc_options[1] = "-O3"; + hiprtc_options[0] = "--gpu-max-threads-per-block=64"; + hiprtc_options[1] = ""; hiprtc_options[2] = ""; hiprtc_options[3] = ""; @@ -11589,7 +11588,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) char device_name_chksum_amp_mp[HCBUFSIZ_TINY] = { 0 }; - const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d", + const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, backend_ctx->hip_driver_version, @@ -11597,8 +11596,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->opencl_platform_vendor_id, device_param->device_name, device_param->opencl_device_version, - device_param->opencl_driver_version, - device_param->kernel_threads); + device_param->opencl_driver_version); md5_ctx_t md5_ctx; @@ -11889,7 +11887,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) const u32 extra_value = (user_options->attack_mode == ATTACK_MODE_ASSOCIATION) ? ATTACK_MODE_ASSOCIATION : ATTACK_MODE_NONE; - const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%d-%u-%s", + const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, backend_ctx->hip_driver_version, @@ -11899,7 +11897,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->opencl_device_version, device_param->opencl_driver_version, device_param->vector_width, - device_param->kernel_threads, hashconfig->kern_type, extra_value, build_options_module_buf); @@ -14617,11 +14614,20 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) } else { + device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64); device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64); } } } + // we + + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64); + device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64); + } + /** * now everything that depends on threads and accel, basically dynamic workload */ From 7faf6859d6e15707d44f377d0ac4a2cdfad3fec1 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Tue, 13 Jul 2021 20:45:01 +0200 Subject: [PATCH 09/22] Backport hand-optimized compiler settings in modules from ROCM to HIP Backport DECLSPEC settings from ROCM to HIP --- OpenCL/inc_vendor.h | 2 +- src/modules/module_01460.c | 6 ++++++ src/modules/module_01500.c | 8 ++++++++ src/modules/module_01700.c | 6 ++++++ src/modules/module_01720.c | 6 ++++++ src/modules/module_01722.c | 6 ++++++ src/modules/module_01800.c | 6 ++++++ src/modules/module_03000.c | 6 ++++++ src/modules/module_05200.c | 6 ++++++ src/modules/module_06211.c | 21 +-------------------- src/modules/module_06212.c | 21 +-------------------- src/modules/module_06213.c | 21 +-------------------- src/modules/module_06800.c | 6 ++++++ src/modules/module_07400.c | 6 ++++++ src/modules/module_07401.c | 6 ++++++ src/modules/module_07500.c | 11 +++++++++++ src/modules/module_07900.c | 6 ++++++ src/modules/module_08700.c | 15 ++++++++++++++- src/modules/module_09600.c | 6 ++++++ src/modules/module_09700.c | 4 ++++ src/modules/module_09710.c | 4 ++++ src/modules/module_09720.c | 37 ++++++++++++++++++++++++++++++++++++- src/modules/module_09800.c | 4 ++++ src/modules/module_09810.c | 4 ++++ src/modules/module_09820.c | 4 ++++ src/modules/module_10400.c | 4 ++++ src/modules/module_10410.c | 4 ++++ src/modules/module_10420.c | 37 ++++++++++++++++++++++++++++++++++++- src/modules/module_10500.c | 4 ++++ src/modules/module_10800.c | 6 ++++++ src/modules/module_10810.c | 6 ++++++ src/modules/module_10820.c | 6 ++++++ src/modules/module_10830.c | 6 ++++++ src/modules/module_10840.c | 6 ++++++ src/modules/module_10870.c | 6 ++++++ src/modules/module_10900.c | 6 ++++++ src/modules/module_11300.c | 6 ++++++ src/modules/module_11600.c | 6 ++++++ src/modules/module_12200.c | 6 ++++++ src/modules/module_12800.c | 6 ++++++ src/modules/module_12900.c | 6 ++++++ src/modules/module_13000.c | 6 ++++++ src/modules/module_13100.c | 11 +++++++++++ src/modules/module_14000.c | 6 ++++++ src/modules/module_14500.c | 6 ++++++ src/modules/module_15000.c | 6 ++++++ src/modules/module_15600.c | 6 ++++++ src/modules/module_16200.c | 6 ++++++ src/modules/module_16300.c | 6 ++++++ src/modules/module_16900.c | 6 ++++++ src/modules/module_18200.c | 11 +++++++++++ src/modules/module_18300.c | 6 ++++++ src/modules/module_18800.c | 6 ++++++ src/modules/module_20600.c | 6 ++++++ src/modules/module_21000.c | 6 ++++++ src/modules/module_22100.c | 6 ++++++ src/modules/module_22200.c | 6 ++++++ src/modules/module_22400.c | 6 ++++++ src/modules/module_23400.c | 6 ++++++ src/modules/module_24200.c | 6 ++++++ src/modules/module_25300.c | 6 ++++++ src/modules/module_25400.c | 11 +++++++++++ src/modules/module_25500.c | 6 ++++++ src/modules/module_25900.c | 6 ++++++ src/modules/module_26200.c | 6 ++++++ src/modules/module_26600.c | 6 ++++++ 66 files changed, 450 insertions(+), 64 deletions(-) diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index dc6a41d4a..d98a85053 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -131,7 +131,7 @@ #if defined IS_AMD && defined IS_GPU #define DECLSPEC inline static #elif defined IS_HIP -#define DECLSPEC __device__ +#define DECLSPEC inline static __device__ #else #define DECLSPEC #endif diff --git a/src/modules/module_01460.c b/src/modules/module_01460.c index 6cb814ed2..f2952aa36 100644 --- a/src/modules/module_01460.c +++ b/src/modules/module_01460.c @@ -59,6 +59,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c index 274d9541a..ea01dab96 100644 --- a/src/modules/module_01500.c +++ b/src/modules/module_01500.c @@ -179,6 +179,14 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); } } + // ROCM + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) + { + hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); + } + } else { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) diff --git a/src/modules/module_01700.c b/src/modules/module_01700.c index 9a7f1d34f..04f2762c1 100644 --- a/src/modules/module_01700.c +++ b/src/modules/module_01700.c @@ -58,6 +58,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_01720.c b/src/modules/module_01720.c index 6833f4405..3fdc77653 100644 --- a/src/modules/module_01720.c +++ b/src/modules/module_01720.c @@ -58,6 +58,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_01722.c b/src/modules/module_01722.c index 4585e2dbb..3264c5f46 100644 --- a/src/modules/module_01722.c +++ b/src/modules/module_01722.c @@ -59,6 +59,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_01800.c b/src/modules/module_01800.c index 1cc1781c2..aefab6e3b 100644 --- a/src/modules/module_01800.c +++ b/src/modules/module_01800.c @@ -438,6 +438,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-fno-unroll-loops"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_03000.c b/src/modules/module_03000.c index a3373e09b..c9b616ab5 100644 --- a/src/modules/module_03000.c +++ b/src/modules/module_03000.c @@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_05200.c b/src/modules/module_05200.c index 470411378..6fb3f08d8 100644 --- a/src/modules/module_05200.c +++ b/src/modules/module_05200.c @@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_06211.c b/src/modules/module_06211.c index b7aa35874..5cb417d26 100644 --- a/src/modules/module_06211.c +++ b/src/modules/module_06211.c @@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE return false; } -char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - char *jit_build_options = NULL; - - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - return jit_build_options; -} - bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const bool potfile_disable = true; @@ -303,7 +284,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = module_jit_build_options; + module_ctx->module_jit_build_options = MODULE_DEFAULT; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_06212.c b/src/modules/module_06212.c index 9ac3487c7..ceb18f192 100644 --- a/src/modules/module_06212.c +++ b/src/modules/module_06212.c @@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE return false; } -char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - char *jit_build_options = NULL; - - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - return jit_build_options; -} - bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const bool potfile_disable = true; @@ -303,7 +284,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = module_jit_build_options; + module_ctx->module_jit_build_options = MODULE_DEFAULT; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_06213.c b/src/modules/module_06213.c index 04430ec31..20323fc62 100644 --- a/src/modules/module_06213.c +++ b/src/modules/module_06213.c @@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE return false; } -char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - char *jit_build_options = NULL; - - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - return jit_build_options; -} - bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const bool potfile_disable = true; @@ -301,7 +282,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = module_jit_build_options; + module_ctx->module_jit_build_options = MODULE_DEFAULT; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_06800.c b/src/modules/module_06800.c index 5a79ca8e7..0f25fa29d 100644 --- a/src/modules/module_06800.c +++ b/src/modules/module_06800.c @@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_07400.c b/src/modules/module_07400.c index f8ebca33d..3be47f898 100644 --- a/src/modules/module_07400.c +++ b/src/modules/module_07400.c @@ -244,6 +244,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_07401.c b/src/modules/module_07401.c index b96318d8e..ba71bf179 100644 --- a/src/modules/module_07401.c +++ b/src/modules/module_07401.c @@ -245,6 +245,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_07500.c b/src/modules/module_07500.c index 931cc6b47..1681fb4a8 100644 --- a/src/modules/module_07500.c +++ b/src/modules/module_07500.c @@ -80,6 +80,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY native_threads = 64; } } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + if (device_param->device_local_mem_size < 49152) + { + native_threads = 32; + } + else + { + native_threads = 64; + } + } else { native_threads = 32; diff --git a/src/modules/module_07900.c b/src/modules/module_07900.c index 67944eb5a..a51efecd9 100644 --- a/src/modules/module_07900.c +++ b/src/modules/module_07900.c @@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_08700.c b/src/modules/module_08700.c index 6f75c6e02..be902d527 100644 --- a/src/modules/module_08700.c +++ b/src/modules/module_08700.c @@ -60,6 +60,19 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con return pw_max; } +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char *jit_build_options = NULL; + + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-fno-unroll-loops"); + } + + return jit_build_options; +} + int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) { u32 *digest = (u32 *) digest_buf; @@ -179,7 +192,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_09600.c b/src/modules/module_09600.c index abfe9fdee..ecf6dc6fb 100644 --- a/src/modules/module_09600.c +++ b/src/modules/module_09600.c @@ -84,6 +84,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_09700.c b/src/modules/module_09700.c index f5e2f1138..cad911186 100644 --- a/src/modules/module_09700.c +++ b/src/modules/module_09700.c @@ -77,6 +77,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_09710.c b/src/modules/module_09710.c index 250bc3863..033f77ea0 100644 --- a/src/modules/module_09710.c +++ b/src/modules/module_09710.c @@ -77,6 +77,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_09720.c b/src/modules/module_09720.c index 7db204dc6..04e99201f 100644 --- a/src/modules/module_09720.c +++ b/src/modules/module_09720.c @@ -58,6 +58,41 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$"; static const char *SIGNATURE_OLDOFFICE0 = "$oldoffice$0"; static const char *SIGNATURE_OLDOFFICE1 = "$oldoffice$1"; +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char *jit_build_options = NULL; + + u32 native_threads = 0; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (oldoffice01_t); @@ -273,7 +308,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_09800.c b/src/modules/module_09800.c index 4508fcd5b..2eb7fab05 100644 --- a/src/modules/module_09800.c +++ b/src/modules/module_09800.c @@ -79,6 +79,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_09810.c b/src/modules/module_09810.c index e1a434cf3..2a1074b2c 100644 --- a/src/modules/module_09810.c +++ b/src/modules/module_09810.c @@ -78,6 +78,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_09820.c b/src/modules/module_09820.c index f3f7ddee2..ea3dfe22b 100644 --- a/src/modules/module_09820.c +++ b/src/modules/module_09820.c @@ -80,6 +80,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_10400.c b/src/modules/module_10400.c index c782d9c04..77416f5ce 100644 --- a/src/modules/module_10400.c +++ b/src/modules/module_10400.c @@ -84,6 +84,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_10410.c b/src/modules/module_10410.c index df1d40d2d..b2c98363f 100644 --- a/src/modules/module_10410.c +++ b/src/modules/module_10410.c @@ -85,6 +85,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_10420.c b/src/modules/module_10420.c index 6f182a436..23e537bf5 100644 --- a/src/modules/module_10420.c +++ b/src/modules/module_10420.c @@ -64,6 +64,41 @@ typedef struct pdf static const char *SIGNATURE_PDF = "$pdf$"; +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char *jit_build_options = NULL; + + u32 native_threads = 0; + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + native_threads = 1; + } + else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) + { + if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + { + native_threads = 8; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + { + native_threads = 64; + } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } + else + { + native_threads = 32; + } + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads); + + return jit_build_options; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (pdf_t); @@ -369,7 +404,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; diff --git a/src/modules/module_10500.c b/src/modules/module_10500.c index fbb1af6a3..80a8478ef 100644 --- a/src/modules/module_10500.c +++ b/src/modules/module_10500.c @@ -108,6 +108,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { native_threads = 64; } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + native_threads = 64; + } else { native_threads = 32; diff --git a/src/modules/module_10800.c b/src/modules/module_10800.c index 65cff2b7e..1765bddac 100644 --- a/src/modules/module_10800.c +++ b/src/modules/module_10800.c @@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_10810.c b/src/modules/module_10810.c index 797c2cf17..10d1443f4 100644 --- a/src/modules/module_10810.c +++ b/src/modules/module_10810.c @@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_10820.c b/src/modules/module_10820.c index d9b76cc5c..82987fe39 100644 --- a/src/modules/module_10820.c +++ b/src/modules/module_10820.c @@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_10830.c b/src/modules/module_10830.c index 91a70b707..f431762f8 100644 --- a/src/modules/module_10830.c +++ b/src/modules/module_10830.c @@ -73,6 +73,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_10840.c b/src/modules/module_10840.c index 4cbb7db28..f60d3ea13 100644 --- a/src/modules/module_10840.c +++ b/src/modules/module_10840.c @@ -73,6 +73,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_10870.c b/src/modules/module_10870.c index 52a70afac..047c67242 100644 --- a/src/modules/module_10870.c +++ b/src/modules/module_10870.c @@ -73,6 +73,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_10900.c b/src/modules/module_10900.c index b0634ec4e..efde01301 100644 --- a/src/modules/module_10900.c +++ b/src/modules/module_10900.c @@ -77,6 +77,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_11300.c b/src/modules/module_11300.c index 9cb3ae217..981a0b471 100644 --- a/src/modules/module_11300.c +++ b/src/modules/module_11300.c @@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_11600.c b/src/modules/module_11600.c index 7694b71f3..25fe732a3 100644 --- a/src/modules/module_11600.c +++ b/src/modules/module_11600.c @@ -111,6 +111,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_12200.c b/src/modules/module_12200.c index 15b6c0c85..3f6b57821 100644 --- a/src/modules/module_12200.c +++ b/src/modules/module_12200.c @@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_12800.c b/src/modules/module_12800.c index 22658f2c6..cd2099cdc 100644 --- a/src/modules/module_12800.c +++ b/src/modules/module_12800.c @@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_12900.c b/src/modules/module_12900.c index cabff4977..4c5a9892b 100644 --- a/src/modules/module_12900.c +++ b/src/modules/module_12900.c @@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_13000.c b/src/modules/module_13000.c index 2d441994a..ab389431d 100644 --- a/src/modules/module_13000.c +++ b/src/modules/module_13000.c @@ -75,6 +75,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_13100.c b/src/modules/module_13100.c index bab0dbf26..fac5cb24c 100644 --- a/src/modules/module_13100.c +++ b/src/modules/module_13100.c @@ -79,6 +79,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY native_threads = 64; } } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + if (device_param->device_local_mem_size < 49152) + { + native_threads = 32; + } + else + { + native_threads = 64; + } + } else { native_threads = 32; diff --git a/src/modules/module_14000.c b/src/modules/module_14000.c index c6854d6c9..013888bcc 100644 --- a/src/modules/module_14000.c +++ b/src/modules/module_14000.c @@ -76,6 +76,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_14500.c b/src/modules/module_14500.c index 286117f86..ba4edab17 100644 --- a/src/modules/module_14500.c +++ b/src/modules/module_14500.c @@ -113,6 +113,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_15000.c b/src/modules/module_15000.c index 481f88cb3..9fae0e769 100644 --- a/src/modules/module_15000.c +++ b/src/modules/module_15000.c @@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_15600.c b/src/modules/module_15600.c index 168609688..c7acdb8d3 100644 --- a/src/modules/module_15600.c +++ b/src/modules/module_15600.c @@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_16200.c b/src/modules/module_16200.c index 7f1b34959..19cbbac8d 100644 --- a/src/modules/module_16200.c +++ b/src/modules/module_16200.c @@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_16300.c b/src/modules/module_16300.c index 8ce5e668d..33997b1ed 100644 --- a/src/modules/module_16300.c +++ b/src/modules/module_16300.c @@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_16900.c b/src/modules/module_16900.c index 5cfe5aeb7..93915b592 100644 --- a/src/modules/module_16900.c +++ b/src/modules/module_16900.c @@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_18200.c b/src/modules/module_18200.c index b95ddab6f..e6596306b 100644 --- a/src/modules/module_18200.c +++ b/src/modules/module_18200.c @@ -79,6 +79,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY native_threads = 64; } } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + if (device_param->device_local_mem_size < 49152) + { + native_threads = 32; + } + else + { + native_threads = 64; + } + } else { native_threads = 32; diff --git a/src/modules/module_18300.c b/src/modules/module_18300.c index 592081296..b58ef35f5 100644 --- a/src/modules/module_18300.c +++ b/src/modules/module_18300.c @@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_18800.c b/src/modules/module_18800.c index 5bb6132a7..6847edde2 100644 --- a/src/modules/module_18800.c +++ b/src/modules/module_18800.c @@ -80,6 +80,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_20600.c b/src/modules/module_20600.c index a7debbc58..e270fde50 100644 --- a/src/modules/module_20600.c +++ b/src/modules/module_20600.c @@ -71,6 +71,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_21000.c b/src/modules/module_21000.c index e482be5e1..7df98beb4 100644 --- a/src/modules/module_21000.c +++ b/src/modules/module_21000.c @@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_22100.c b/src/modules/module_22100.c index cd79bc7f8..47b72d7df 100644 --- a/src/modules/module_22100.c +++ b/src/modules/module_22100.c @@ -86,6 +86,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_22200.c b/src/modules/module_22200.c index 1f034ef50..5ea525fbe 100644 --- a/src/modules/module_22200.c +++ b/src/modules/module_22200.c @@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_22400.c b/src/modules/module_22400.c index 567dec821..a3ab81101 100644 --- a/src/modules/module_22400.c +++ b/src/modules/module_22400.c @@ -96,6 +96,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_23400.c b/src/modules/module_23400.c index 4ba636bfa..5921ca9db 100644 --- a/src/modules/module_23400.c +++ b/src/modules/module_23400.c @@ -69,6 +69,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_24200.c b/src/modules/module_24200.c index b1da70c9a..bd93b36e0 100644 --- a/src/modules/module_24200.c +++ b/src/modules/module_24200.c @@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_25300.c b/src/modules/module_25300.c index 6bed1017c..e21d55881 100644 --- a/src/modules/module_25300.c +++ b/src/modules/module_25300.c @@ -72,6 +72,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY hc_asprintf (&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_25400.c b/src/modules/module_25400.c index 5dbbe8dc3..341837786 100644 --- a/src/modules/module_25400.c +++ b/src/modules/module_25400.c @@ -118,6 +118,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY native_threads = 64; } } + else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + if (device_param->device_local_mem_size < 49152) + { + native_threads = 32; + } + else + { + native_threads = 64; + } + } else { native_threads = 32; diff --git a/src/modules/module_25500.c b/src/modules/module_25500.c index 10edd7203..e6853e951 100644 --- a/src/modules/module_25500.c +++ b/src/modules/module_25500.c @@ -74,6 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_25900.c b/src/modules/module_25900.c index efe7f39f2..c44a1642b 100644 --- a/src/modules/module_25900.c +++ b/src/modules/module_25900.c @@ -81,6 +81,12 @@ char* module_jit_build_options(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYB hc_asprintf(&jit_build_options, "-D _unroll"); } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_26200.c b/src/modules/module_26200.c index 63989796e..8b082f2d5 100644 --- a/src/modules/module_26200.c +++ b/src/modules/module_26200.c @@ -74,6 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { diff --git a/src/modules/module_26600.c b/src/modules/module_26600.c index 61ccac983..c9e04958c 100644 --- a/src/modules/module_26600.c +++ b/src/modules/module_26600.c @@ -74,6 +74,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + // ROCM if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) { From f3bd9369719cf941d580ee9ef6d68c0e7bfc4b5f Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 14 Jul 2021 08:23:39 +0200 Subject: [PATCH 10/22] Add hardware monitor mapping for HIP devices --- src/backend.c | 10 +++++----- src/hwmon.c | 52 +++++++++++++++++++++++++-------------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/backend.c b/src/backend.c index defd20aeb..d63ee3a1d 100644 --- a/src/backend.c +++ b/src/backend.c @@ -8278,13 +8278,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) } #endif - if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV)) + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD_USE_HIP) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { - need_nvml = true; + need_adl = true; - #if defined (_WIN) || defined (__CYGWIN__) - need_nvapi = true; - #endif + #if defined (__linux__) + need_sysfs_amdgpu = true; + #endif } // CPU burning loop damper diff --git a/src/hwmon.c b/src/hwmon.c index bc95f9515..374056d07 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -95,11 +95,11 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -176,11 +176,11 @@ int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -245,7 +245,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_CPU) { @@ -313,7 +313,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b } #endif - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -401,11 +401,11 @@ int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac return 1; } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -499,11 +499,11 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -609,11 +609,11 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -696,11 +696,11 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -800,11 +800,11 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -887,11 +887,11 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { if (hwmon_ctx->hm_adl) { @@ -1003,11 +1003,11 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back } } - if (backend_ctx->devices_param[backend_device_idx].is_opencl == true) + if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true)) { if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) + if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { } @@ -1382,11 +1382,11 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) // nothing to do } - if (device_param->is_opencl == true) + if ((device_param->is_opencl == true) || (device_param->is_hip == true)) { if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue; - if (device_param->opencl_device_vendor_id != VENDOR_ID_AMD) continue; + if ((device_param->opencl_device_vendor_id != VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id != VENDOR_ID_AMD_USE_HIP)) continue; for (int i = 0; i < tmp_in; i++) { @@ -1438,7 +1438,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) // nothing to do } - if (device_param->is_opencl == true) + if ((device_param->is_opencl == true) || (device_param->is_hip == true)) { const u32 device_id = device_param->device_id; @@ -1485,7 +1485,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) // nothing to do } - if (device_param->is_opencl == true) + if ((device_param->is_opencl == true) || (device_param->is_hip == true)) { const u32 device_id = device_param->device_id; @@ -1594,7 +1594,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) } } - if (device_param->is_opencl == true) + if ((device_param->is_opencl == true) || (device_param->is_hip == true)) { if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { @@ -1655,7 +1655,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) } #endif - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) + if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) || (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)) { hwmon_ctx->hm_device[backend_devices_idx].adl = hm_adapters_adl[device_id].adl; hwmon_ctx->hm_device[backend_devices_idx].sysfs_amdgpu = hm_adapters_sysfs_amdgpu[device_id].sysfs_amdgpu; From 9c134833a6769f23caeef2280545c8037351e539 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 14 Jul 2021 08:26:12 +0200 Subject: [PATCH 11/22] Update module_unstable_warning() for -m 19600 and -m 19700 --- src/modules/module_19600.c | 5 ----- src/modules/module_19700.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/src/modules/module_19600.c b/src/modules/module_19600.c index b0ff3acb4..343b09637 100644 --- a/src/modules/module_19600.c +++ b/src/modules/module_19600.c @@ -68,11 +68,6 @@ static const char *SIGNATURE_KRB5TGS = "$krb5tgs$17$"; bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - return true; - } - // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { diff --git a/src/modules/module_19700.c b/src/modules/module_19700.c index 6bb194499..7d353e598 100644 --- a/src/modules/module_19700.c +++ b/src/modules/module_19700.c @@ -68,11 +68,6 @@ static const char *SIGNATURE_KRB5TGS = "$krb5tgs$18$"; bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - return true; - } - // AMD Radeon Pro W5700X Compute Engine; 1.2 (Apr 22 2021 21:54:44); 11.3.1; 20E241 if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)) { From 11295e467990e083f7eb765d48719a56e0cd5e18 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 14 Jul 2021 17:01:46 +0200 Subject: [PATCH 12/22] Fix missing OPTI_TYPE_USES_BITS_64 in several modules --- src/modules/module_09600.c | 1 + src/modules/module_10100.c | 1 + src/modules/module_10700.c | 1 + src/modules/module_11700.c | 3 ++- src/modules/module_11750.c | 3 ++- src/modules/module_11760.c | 3 ++- src/modules/module_11800.c | 3 ++- src/modules/module_11850.c | 3 ++- src/modules/module_11860.c | 3 ++- src/modules/module_19200.c | 3 ++- 10 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/modules/module_09600.c b/src/modules/module_09600.c index abfe9fdee..4d5cb0afd 100644 --- a/src/modules/module_09600.c +++ b/src/modules/module_09600.c @@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_DOCUMENTS; static const char *HASH_NAME = "MS Office 2013"; static const u64 KERN_TYPE = 9600; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_DEEP_COMP_KERNEL; diff --git a/src/modules/module_10100.c b/src/modules/module_10100.c index 092db5cf3..c5cafc8c9 100644 --- a/src/modules/module_10100.c +++ b/src/modules/module_10100.c @@ -21,6 +21,7 @@ static const char *HASH_NAME = "SipHash"; static const u64 KERN_TYPE = 10100; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED + | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_RAW_HASH; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; diff --git a/src/modules/module_10700.c b/src/modules/module_10700.c index 6b6317934..5c801b3c9 100644 --- a/src/modules/module_10700.c +++ b/src/modules/module_10700.c @@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_DOCUMENTS; static const char *HASH_NAME = "PDF 1.7 Level 8 (Acrobat 10 - 11)"; static const u64 KERN_TYPE = 10700; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_HASH_COPY; diff --git a/src/modules/module_11700.c b/src/modules/module_11700.c index eb872087c..ebf903a55 100644 --- a/src/modules/module_11700.c +++ b/src/modules/module_11700.c @@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_8; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; static const char *HASH_NAME = "GOST R 34.11-2012 (Streebog) 256-bit, big-endian"; static const u64 KERN_TYPE = 11700; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_ADD01; static const u32 SALT_TYPE = SALT_TYPE_NONE; diff --git a/src/modules/module_11750.c b/src/modules/module_11750.c index 8ef88b63b..f2302db35 100644 --- a/src/modules/module_11750.c +++ b/src/modules/module_11750.c @@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_8; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_AUTHENTICATED; static const char *HASH_NAME = "HMAC-Streebog-256 (key = $pass), big-endian"; static const u64 KERN_TYPE = 11750; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_ADD01; static const u32 SALT_TYPE = SALT_TYPE_GENERIC; diff --git a/src/modules/module_11760.c b/src/modules/module_11760.c index d81290f24..5c574e469 100644 --- a/src/modules/module_11760.c +++ b/src/modules/module_11760.c @@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_8; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_AUTHENTICATED; static const char *HASH_NAME = "HMAC-Streebog-256 (key = $salt), big-endian"; static const u64 KERN_TYPE = 11760; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_ADD01; static const u32 SALT_TYPE = SALT_TYPE_GENERIC; diff --git a/src/modules/module_11800.c b/src/modules/module_11800.c index 89bd377d9..3c83bc0c0 100644 --- a/src/modules/module_11800.c +++ b/src/modules/module_11800.c @@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_16; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH; static const char *HASH_NAME = "GOST R 34.11-2012 (Streebog) 512-bit, big-endian"; static const u64 KERN_TYPE = 11800; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_ADD01; static const u32 SALT_TYPE = SALT_TYPE_NONE; diff --git a/src/modules/module_11850.c b/src/modules/module_11850.c index 18a993aa2..6bf1853b4 100644 --- a/src/modules/module_11850.c +++ b/src/modules/module_11850.c @@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_16; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_AUTHENTICATED; static const char *HASH_NAME = "HMAC-Streebog-512 (key = $pass), big-endian"; static const u64 KERN_TYPE = 11850; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_ADD01; static const u32 SALT_TYPE = SALT_TYPE_GENERIC; diff --git a/src/modules/module_11860.c b/src/modules/module_11860.c index ad29aa7a5..67a5ff618 100644 --- a/src/modules/module_11860.c +++ b/src/modules/module_11860.c @@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_16; static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_AUTHENTICATED; static const char *HASH_NAME = "HMAC-Streebog-512 (key = $salt), big-endian"; static const u64 KERN_TYPE = 11860; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_PT_ADD01; static const u32 SALT_TYPE = SALT_TYPE_GENERIC; diff --git a/src/modules/module_19200.c b/src/modules/module_19200.c index 165b66ec9..3dd7b46f6 100644 --- a/src/modules/module_19200.c +++ b/src/modules/module_19200.c @@ -21,7 +21,8 @@ static const u32 DGST_SIZE = DGST_SIZE_8_8; static const u32 HASH_CATEGORY = HASH_CATEGORY_OS; static const char *HASH_NAME = "QNX /etc/shadow (SHA512)"; static const u64 KERN_TYPE = 19200; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; From cf512faa53f3641c79e702f74e24f29ff26db092 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 14 Jul 2021 17:06:20 +0200 Subject: [PATCH 13/22] Update large switch() cases in inc_common.cl and some inline assembly common functions for devices managed with HIP backend --- OpenCL/inc_common.cl | 174 +++++++++++++++++----------------- OpenCL/inc_ecc_secp256k1.cl | 4 +- OpenCL/inc_rp_optimized.cl | 18 +--- OpenCL/inc_vendor.h | 2 +- OpenCL/m00500-optimized.cl | 6 +- OpenCL/m01500_a3-pure.cl | 4 +- OpenCL/m01600-optimized.cl | 6 +- OpenCL/m03000_a3-pure.cl | 4 +- OpenCL/m05800-optimized.cl | 2 +- OpenCL/m06300-optimized.cl | 6 +- OpenCL/m07400-optimized.cl | 14 +-- OpenCL/m07700_a0-optimized.cl | 4 +- OpenCL/m07700_a1-optimized.cl | 5 - OpenCL/m07700_a3-optimized.cl | 5 - OpenCL/m07701_a0-optimized.cl | 5 - OpenCL/m07701_a1-optimized.cl | 5 - OpenCL/m07701_a3-optimized.cl | 5 - OpenCL/m10700-optimized.cl | 4 +- OpenCL/m11600-pure.cl | 2 +- OpenCL/m12500-pure.cl | 2 +- OpenCL/m13800_a0-optimized.cl | 2 +- OpenCL/m13800_a1-optimized.cl | 2 +- OpenCL/m13800_a3-optimized.cl | 2 +- OpenCL/m14000_a3-pure.cl | 4 +- OpenCL/m23700-pure.cl | 2 +- OpenCL/m23800-pure.cl | 2 +- src/backend.c | 22 ++--- 27 files changed, 139 insertions(+), 174 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 26df19a2b..82b50b7c8 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -528,7 +528,7 @@ DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 0) & 0xff; @@ -575,7 +575,7 @@ DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 8) & 0xff; @@ -622,7 +622,7 @@ DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 16) & 0xff; @@ -669,7 +669,7 @@ DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 24) & 0xff; @@ -684,7 +684,7 @@ DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 0) & 0xff; @@ -699,7 +699,7 @@ DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 8) & 0xff; @@ -714,7 +714,7 @@ DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 16) & 0xff; @@ -729,7 +729,7 @@ DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 24) & 0xff; @@ -939,9 +939,9 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotl64 (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotl64 (a, n); #else #ifdef USE_ROTATE @@ -956,9 +956,9 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotr64 (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotr64 (a, n); #else #ifdef USE_ROTATE @@ -973,9 +973,9 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotl64_S (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotl64_S (a, n); #else #ifdef USE_ROTATE @@ -990,9 +990,9 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotr64_S (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotr64_S (a, n); #else #ifdef USE_ROTATE @@ -1012,7 +1012,7 @@ DECLSPEC u32x hc_swap32 (const u32x v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_32 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; @@ -1109,7 +1109,7 @@ DECLSPEC u32 hc_swap32_S (const u32 v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_32 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(0x00010203)); #elif defined IS_NV && HAS_PRMT == 1 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v)); @@ -1135,7 +1135,7 @@ DECLSPEC u64x hc_swap64 (const u64x v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_64 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; @@ -1354,7 +1354,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_64 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; const u32 v0 = h32_from_64_S (v); @@ -1399,7 +1399,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v) return r; } -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { @@ -2767,7 +2767,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x3727); out1[0] = hc_byte_perm (in[0], 0, 0x1707); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x03070207); out2[2] = hc_byte_perm (in[3], 0, 0x01070007); @@ -2805,7 +2805,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x1707); out1[0] = hc_byte_perm (in[0], 0, 0x3727); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x01070007); out2[2] = hc_byte_perm (in[3], 0, 0x03070207); @@ -2843,7 +2843,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x7372); out1[0] = hc_byte_perm (in[0], 0, 0x7170); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x07030702); out2[2] = hc_byte_perm (in[3], 0, 0x07010700); @@ -2881,7 +2881,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x7170); out1[0] = hc_byte_perm (in[0], 0, 0x7372); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x07010700); out2[2] = hc_byte_perm (in[3], 0, 0x07030702); @@ -2915,7 +2915,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out) out[2] = hc_byte_perm (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm (in2[2], in2[3], 0x4602); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002); @@ -2945,7 +2945,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out) out[2] = hc_byte_perm (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm (in2[2], in2[3], 0x6420); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200); @@ -3069,7 +3069,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -3394,7 +3394,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -3404,7 +3404,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -3737,7 +3737,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le (u32x *w0, u32x *w1, u32x *w2, u3 { const int offset_switch = offset / 4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -4665,7 +4665,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -4990,13 +4990,13 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -5329,7 +5329,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -5790,13 +5790,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -6265,7 +6265,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -7422,7 +7422,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -7432,7 +7432,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -8005,7 +8005,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -9690,7 +9690,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -9700,7 +9700,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -11393,7 +11393,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -12550,13 +12550,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -13721,7 +13721,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -15406,13 +15406,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -17105,7 +17105,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -21462,7 +21462,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -21472,7 +21472,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -25837,7 +25837,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -30194,13 +30194,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -36533,7 +36533,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2) out1[1] = hc_byte_perm_S (in[0], 0, 0x3727); out1[0] = hc_byte_perm_S (in[0], 0, 0x1707); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007); @@ -36571,7 +36571,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2) out1[1] = hc_byte_perm_S (in[0], 0, 0x7372); out1[0] = hc_byte_perm_S (in[0], 0, 0x7170); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702); out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700); @@ -36605,7 +36605,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out) out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002); @@ -36635,7 +36635,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out) out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200); @@ -36660,7 +36660,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -36985,7 +36985,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -36995,7 +36995,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -37328,7 +37328,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32 { const int offset_switch = offset / 4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38256,7 +38256,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38581,13 +38581,13 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -38920,7 +38920,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -39381,13 +39381,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -39856,7 +39856,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -41013,7 +41013,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -41023,7 +41023,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -41596,7 +41596,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -43281,7 +43281,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -43291,7 +43291,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -44984,7 +44984,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 * { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -46141,13 +46141,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 * } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -47312,7 +47312,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -48997,13 +48997,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -50696,7 +50696,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -55053,7 +55053,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -55063,7 +55063,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -59428,7 +59428,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -63785,13 +63785,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index e21f528d6..b3a70df78 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -124,7 +124,7 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b) : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) ); - #elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1 + #elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1 __asm__ __volatile__ ( "V_SUB_U32 %0, %9, %17;" @@ -176,7 +176,7 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b) : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) ); - #elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1 + #elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1 __asm__ __volatile__ ( "V_ADD_U32 %0, %9, %17;" diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index dc3754907..026198f09 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -781,7 +781,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 src_r00 = src_r0[0]; const u32 src_r01 = src_r0[1]; const u32 src_r02 = src_r0[2]; @@ -884,7 +884,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -894,7 +894,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -1359,11 +1359,7 @@ DECLSPEC u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; - #ifdef IS_AMD const int p0_switch = p0 / 4; - #else - const int p0_switch = p0 / 4; - #endif switch (p0_switch) { @@ -1466,11 +1462,7 @@ DECLSPEC u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; - #ifdef IS_AMD const int p0_switch = p0 / 4; - #else - const int p0_switch = p0 / 4; - #endif switch (p0_switch) { @@ -1552,11 +1544,7 @@ DECLSPEC u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED cons const u32 mr = 0xffffff00 << ((p0 & 3) * 8); - #ifdef IS_AMD const int p0_switch = p0 / 4; - #else - const int p0_switch = p0 / 4; - #endif switch (p0_switch) { diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index d98a85053..a94bbefd4 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -96,7 +96,7 @@ #elif VENDOR_ID == (1 << 8) #define IS_AMD_USE_HIP // TODO HIP optimization potential -#define IS_GENERIC +//#define IS_GENERIC #else #define IS_GENERIC #endif diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl index 6ea000442..19f7153ff 100644 --- a/OpenCL/m00500-optimized.cl +++ b/OpenCL/m00500-optimized.cl @@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -139,7 +139,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -246,7 +246,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl index f7a8ad45c..c2c4245e1 100644 --- a/OpenCL/m01500_a3-pure.cl +++ b/OpenCL/m01500_a3-pure.cl @@ -19,7 +19,7 @@ #define KXX_DECL #endif -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) #define KXX_DECL #endif @@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const #endif #endif -#if defined IS_AMD || defined IS_GENERIC +#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC /* * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl index 6489a04b8..cfaad44cc 100644 --- a/OpenCL/m01600-optimized.cl +++ b/OpenCL/m01600-optimized.cl @@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -138,7 +138,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -245,7 +245,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl index a44b6f065..65beaabda 100644 --- a/OpenCL/m03000_a3-pure.cl +++ b/OpenCL/m03000_a3-pure.cl @@ -19,7 +19,7 @@ #define KXX_DECL #endif -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) #define KXX_DECL #endif @@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const #endif #endif -#if defined IS_AMD || defined IS_GENERIC +#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC /* * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index b247b05e0..38099159f 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u u32 tmp4; u32 tmp5; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl index c3d320c95..b7c9ddddd 100644 --- a/OpenCL/m06300-optimized.cl +++ b/OpenCL/m06300-optimized.cl @@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -135,7 +135,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -242,7 +242,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index df1f3478f..7efa5c94e 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in2 = append[2]; u32 in3 = append[3]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -165,7 +165,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u u32 in2 = append[2]; u32 in3 = append[3]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -322,7 +322,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u u32 in3 = append[3]; u32 in4 = append[4]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -456,7 +456,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const u32 in3 = append[3]; u32 in4 = append[4]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -756,7 +756,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in2 = append[2]; u32 in3 = append[3]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -915,7 +915,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons u32 in3 = append[3]; u32 in4 = 0x80000000; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -1074,7 +1074,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in3 = append[3]; u32 in4 = append[4]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); diff --git a/OpenCL/m07700_a0-optimized.cl b/OpenCL/m07700_a0-optimized.cl index 81a69e83e..165691e6d 100644 --- a/OpenCL/m07700_a0-optimized.cl +++ b/OpenCL/m07700_a0-optimized.cl @@ -17,13 +17,15 @@ #include "inc_hash_md5.cl" #endif +/* #ifdef IS_AMD #define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) #else +*/ + #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07700_a1-optimized.cl b/OpenCL/m07700_a1-optimized.cl index 9431d66a6..77a3bb26a 100644 --- a/OpenCL/m07700_a1-optimized.cl +++ b/OpenCL/m07700_a1-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl index 53dbb1fe2..e867cb070 100644 --- a/OpenCL/m07700_a3-optimized.cl +++ b/OpenCL/m07700_a3-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif CONSTANT_VK u32a sapb_trans_tbl[256] = { diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl index b530785a7..55430df32 100644 --- a/OpenCL/m07701_a0-optimized.cl +++ b/OpenCL/m07701_a0-optimized.cl @@ -17,13 +17,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl index e1ae00412..425bb3a04 100644 --- a/OpenCL/m07701_a1-optimized.cl +++ b/OpenCL/m07701_a1-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl index ae6762e90..934c943f0 100644 --- a/OpenCL/m07701_a3-optimized.cl +++ b/OpenCL/m07701_a3-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif CONSTANT_VK u32a sapb_trans_tbl[256] = { diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index bf311a22e..a9b50a6ac 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, u32 i; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC for (i = 0; i < pd; i++) sc[idx++] = pw[i]; sc[idx++] = pw[i] | hc_bytealign_be (bl[0], 0, pm4); @@ -263,7 +263,7 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con const u32 om = m % 4; const u32 od = m / 4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om); pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om); pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om); diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 469365ae6..be42e185b 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -42,7 +42,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index ce1b2cb53..f8ed47771 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -37,7 +37,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index a036044b0..6758ffbd4 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 3b462466e..85e711b94 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index 9ad06a344..65b759de0 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl index f44e0bbfc..84b419923 100644 --- a/OpenCL/m14000_a3-pure.cl +++ b/OpenCL/m14000_a3-pure.cl @@ -19,7 +19,7 @@ #define KXX_DECL #endif -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) #define KXX_DECL #endif @@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const #endif #endif -#if defined IS_AMD || defined IS_GENERIC +#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC /* * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl index ec62394a1..af287574e 100644 --- a/OpenCL/m23700-pure.cl +++ b/OpenCL/m23700-pure.cl @@ -145,7 +145,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl index 1629433c0..f6d345677 100644 --- a/OpenCL/m23800-pure.cl +++ b/OpenCL/m23800-pure.cl @@ -56,7 +56,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/src/backend.c b/src/backend.c index d63ee3a1d..53de2d525 100644 --- a/src/backend.c +++ b/src/backend.c @@ -8339,17 +8339,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->has_mov64 = false; device_param->has_prmt = false; - device_param->has_vadd = false; - device_param->has_vaddc = false; - device_param->has_vadd_co = false; - device_param->has_vaddc_co = false; - device_param->has_vsub = false; - device_param->has_vsubb = false; - device_param->has_vsub_co = false; - device_param->has_vsubb_co = false; - device_param->has_vadd3 = false; - device_param->has_vbfe = false; - device_param->has_vperm = false; + device_param->has_vadd = true; + device_param->has_vaddc = true; + device_param->has_vadd_co = true; + device_param->has_vaddc_co = true; + device_param->has_vsub = true; + device_param->has_vsubb = true; + device_param->has_vsub_co = true; + device_param->has_vsubb_co = true; + device_param->has_vadd3 = true; + device_param->has_vbfe = true; + device_param->has_vperm = true; // device_available_mem From 80a671eab73ed19115cddc0b6852030a7df67f28 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 14 Jul 2021 19:21:21 +0200 Subject: [PATCH 14/22] Fix bug on hm_SYSFS_CPU_get_syspath_hwmon() --- src/ext_sysfs_cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ext_sysfs_cpu.c b/src/ext_sysfs_cpu.c index ddfd5d4d4..532d284a3 100644 --- a/src/ext_sysfs_cpu.c +++ b/src/ext_sysfs_cpu.c @@ -63,7 +63,7 @@ char *hm_SYSFS_CPU_get_syspath_hwmon () if (hc_fopen_raw (&fp, path, "rb") == false) continue; - char buf[16]; + char buf[32] = { 0 }; const size_t line_len = fgetl (&fp, buf, sizeof (buf)); From d130cc66b3f09bdd12cfa940e7a8f3a388bfc937 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 15 Jul 2021 09:57:41 +0200 Subject: [PATCH 15/22] Optimize ISA code on HIP for V_ALIGNBIT_B32 using a different template for inline assembly --- OpenCL/inc_platform.cl | 2 +- src/modules/module_08200.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 806a403e4..3f1336f92 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -253,7 +253,7 @@ DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n) { u32 r = 0; - __asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "v"(n)); + asm ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n)); return r; } diff --git a/src/modules/module_08200.c b/src/modules/module_08200.c index c092919a3..617b80972 100644 --- a/src/modules/module_08200.c +++ b/src/modules/module_08200.c @@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_PASSWORD_MANAGER; static const char *HASH_NAME = "1Password, cloudkeychain"; static const u64 KERN_TYPE = 8200; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; From 4730cf6e79709912882636d77e0867b0be954f0e Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 15 Jul 2021 16:42:29 +0200 Subject: [PATCH 16/22] WinZip Kernel: Increase supported data length from 8k to 16mb --- OpenCL/m13600-pure.cl | 2 +- docs/changes.txt | 1 + src/modules/module_13600.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/OpenCL/m13600-pure.cl b/OpenCL/m13600-pure.cl index 1e0ff6d1c..0202cf0bf 100644 --- a/OpenCL/m13600-pure.cl +++ b/OpenCL/m13600-pure.cl @@ -37,7 +37,7 @@ typedef struct zip2 u32 verify_bytes; u32 compress_length; u32 data_len; - u32 data_buf[2048]; + u32 data_buf[0x4000000]; u32 auth_len; u32 auth_buf[4]; diff --git a/docs/changes.txt b/docs/changes.txt index d9e895e53..28876cd93 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -63,6 +63,7 @@ - OpenCL Runtime: Workaround JiT crash (SC failed. No reason given.) on macOS by limiting local memory allocations to 32k - Status View: Include time and duration info when pausing and resuming - Tests: Changed tests for VeraCrypt from -a 0 to -a 3, because password extension is not available to all shells +- WinZip Kernel: Increase supported data length from 8k to 16mb * changes v6.2.1 -> v6.2.2 diff --git a/src/modules/module_13600.c b/src/modules/module_13600.c index a7d675138..ec1c4e6ef 100644 --- a/src/modules/module_13600.c +++ b/src/modules/module_13600.c @@ -61,7 +61,7 @@ typedef struct zip2 u32 verify_bytes; u32 compress_length; u32 data_len; - u32 data_buf[2048]; + u32 data_buf[0x4000000]; u32 auth_len; u32 auth_buf[4]; @@ -166,7 +166,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE | TOKEN_ATTR_VERIFY_HEX; token.len_min[7] = 0; - token.len_max[7] = 16384; + token.len_max[7] = 0x4000000 * 4 * 2; token.sep[7] = '*'; token.attr[7] = TOKEN_ATTR_VERIFY_LENGTH | TOKEN_ATTR_VERIFY_HEX; From 2e929e692e9a99a5a1faef1eaaef11a7fbd617bb Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 15 Jul 2021 21:26:07 +0200 Subject: [PATCH 17/22] Backport update module_unstable_warning() for -m 21800 on HIP --- src/modules/module_21800.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c index 9ae25490d..a3f3a00c2 100644 --- a/src/modules/module_21800.c +++ b/src/modules/module_21800.c @@ -96,6 +96,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE } } + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + return true; + } + // amdgpu-pro-20.50-1234664-ubuntu-20.04 (rocr) // test_1620713931/test_report.log:! unhandled return code 255, cmdline : cat test_1620713931/21800_passwords.txt | ./hashcat --quiet --potfile-disable --runtime 400 --hwmon-disable -O -D 2 --backend-vector-width 4 -a 0 -m 21800 test_1620713931/21800_hashes.txt // test_1620719578/test_report.log:! unhandled return code 255, cmdline : cat test_1620719578/21800_passwords.txt | ./hashcat --quiet --potfile-disable --runtime 400 --hwmon-disable -D 2 --backend-vector-width 4 -a 0 -m 21800 test_1620719578/21800_hashes.txt From 45e65dd05a9e3fb14baca4f5d39f019d86231783 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 15 Jul 2021 23:34:27 +0200 Subject: [PATCH 18/22] Backport more ROCm based optimizations to HIP --- OpenCL/inc_platform.cl | 2 +- OpenCL/inc_vendor.h | 8 ++------ OpenCL/m01700_a0-optimized.cl | 2 +- OpenCL/m01700_a1-optimized.cl | 2 +- OpenCL/m01700_a3-optimized.cl | 2 +- OpenCL/m01710_a0-optimized.cl | 2 +- OpenCL/m01710_a1-optimized.cl | 2 +- OpenCL/m01710_a3-optimized.cl | 2 +- OpenCL/m01720_a0-optimized.cl | 2 +- OpenCL/m01720_a1-optimized.cl | 2 +- OpenCL/m01720_a3-optimized.cl | 2 +- OpenCL/m01730_a0-optimized.cl | 2 +- OpenCL/m01730_a1-optimized.cl | 2 +- OpenCL/m01730_a3-optimized.cl | 2 +- OpenCL/m01740_a0-optimized.cl | 2 +- OpenCL/m01740_a1-optimized.cl | 2 +- OpenCL/m01740_a3-optimized.cl | 2 +- OpenCL/m08000_a0-optimized.cl | 4 ++-- OpenCL/m08000_a1-optimized.cl | 4 ++-- OpenCL/m08000_a3-optimized.cl | 4 ++-- OpenCL/m10800_a0-optimized.cl | 2 +- OpenCL/m10800_a1-optimized.cl | 2 +- OpenCL/m10800_a3-optimized.cl | 2 +- OpenCL/m21000_a0-optimized.cl | 2 +- OpenCL/m21000_a1-optimized.cl | 4 ++-- OpenCL/m21000_a3-optimized.cl | 4 ++-- OpenCL/m22200_a0-optimized.cl | 2 +- OpenCL/m22200_a1-optimized.cl | 2 +- OpenCL/m22200_a3-optimized.cl | 2 +- src/backend.c | 37 ++++++++++++++++++++--------------- tools/benchmark_deep.pl | 2 +- 31 files changed, 57 insertions(+), 56 deletions(-) diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index 3f1336f92..8ccb034aa 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -253,7 +253,7 @@ DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n) { u32 r = 0; - asm ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n)); + __asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n)); return r; } diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index a94bbefd4..f4c31f59a 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -95,8 +95,6 @@ #define IS_GENERIC #elif VENDOR_ID == (1 << 8) #define IS_AMD_USE_HIP -// TODO HIP optimization potential -//#define IS_GENERIC #else #define IS_GENERIC #endif @@ -158,10 +156,8 @@ #endif #ifdef IS_HIP -//TODO HIP -//#define USE_BITSELECT -//#define USE_ROTATE -//#define USE_SWIZZLE +#define USE_BITSELECT +#define USE_ROTATE #endif #ifdef IS_ROCM diff --git a/OpenCL/m01700_a0-optimized.cl b/OpenCL/m01700_a0-optimized.cl index 18c7c61ab..dcc4c217e 100644 --- a/OpenCL/m01700_a0-optimized.cl +++ b/OpenCL/m01700_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); -#if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01700_a1-optimized.cl b/OpenCL/m01700_a1-optimized.cl index 21efdcc46..6ca96c818 100644 --- a/OpenCL/m01700_a1-optimized.cl +++ b/OpenCL/m01700_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01700_a3-optimized.cl b/OpenCL/m01700_a3-optimized.cl index 064044263..6444cfae0 100644 --- a/OpenCL/m01700_a3-optimized.cl +++ b/OpenCL/m01700_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01710_a0-optimized.cl b/OpenCL/m01710_a0-optimized.cl index 4b66b83f5..2c72b062f 100644 --- a/OpenCL/m01710_a0-optimized.cl +++ b/OpenCL/m01710_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01710_a1-optimized.cl b/OpenCL/m01710_a1-optimized.cl index e7b691334..45111549b 100644 --- a/OpenCL/m01710_a1-optimized.cl +++ b/OpenCL/m01710_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01710_a3-optimized.cl b/OpenCL/m01710_a3-optimized.cl index 1e893c967..76a331f7e 100644 --- a/OpenCL/m01710_a3-optimized.cl +++ b/OpenCL/m01710_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01720_a0-optimized.cl b/OpenCL/m01720_a0-optimized.cl index 6def5fff2..bfb0bb37e 100644 --- a/OpenCL/m01720_a0-optimized.cl +++ b/OpenCL/m01720_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01720_a1-optimized.cl b/OpenCL/m01720_a1-optimized.cl index ffe6fe15a..ade402112 100644 --- a/OpenCL/m01720_a1-optimized.cl +++ b/OpenCL/m01720_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01720_a3-optimized.cl b/OpenCL/m01720_a3-optimized.cl index 3fdf675e0..5dfc2b9d2 100644 --- a/OpenCL/m01720_a3-optimized.cl +++ b/OpenCL/m01720_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01730_a0-optimized.cl b/OpenCL/m01730_a0-optimized.cl index 45c025215..14a965c4b 100644 --- a/OpenCL/m01730_a0-optimized.cl +++ b/OpenCL/m01730_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01730_a1-optimized.cl b/OpenCL/m01730_a1-optimized.cl index 03b3e10af..0fe9c945d 100644 --- a/OpenCL/m01730_a1-optimized.cl +++ b/OpenCL/m01730_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01730_a3-optimized.cl b/OpenCL/m01730_a3-optimized.cl index b114b8c18..e533b3e95 100644 --- a/OpenCL/m01730_a3-optimized.cl +++ b/OpenCL/m01730_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01740_a0-optimized.cl b/OpenCL/m01740_a0-optimized.cl index b04db3c82..b9be203f5 100644 --- a/OpenCL/m01740_a0-optimized.cl +++ b/OpenCL/m01740_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01740_a1-optimized.cl b/OpenCL/m01740_a1-optimized.cl index 2fdb41cde..599cdcbad 100644 --- a/OpenCL/m01740_a1-optimized.cl +++ b/OpenCL/m01740_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m01740_a3-optimized.cl b/OpenCL/m01740_a3-optimized.cl index 949fa4999..d432b4f4f 100644 --- a/OpenCL/m01740_a3-optimized.cl +++ b/OpenCL/m01740_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m08000_a0-optimized.cl b/OpenCL/m08000_a0-optimized.cl index 873d4805b..67bdf8d61 100644 --- a/OpenCL/m08000_a0-optimized.cl +++ b/OpenCL/m08000_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w) ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -143,7 +143,7 @@ DECLSPEC void sha256_transform_z (u32x *digest) ROUND_STEP_Z (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_STEP_Z (16); ROUND_STEP_Z (32); ROUND_STEP_Z (48); diff --git a/OpenCL/m08000_a1-optimized.cl b/OpenCL/m08000_a1-optimized.cl index e05eb37f2..01c925243 100644 --- a/OpenCL/m08000_a1-optimized.cl +++ b/OpenCL/m08000_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w) ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest) ROUND_STEP_Z (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_STEP_Z (16); ROUND_STEP_Z (32); ROUND_STEP_Z (48); diff --git a/OpenCL/m08000_a3-optimized.cl b/OpenCL/m08000_a3-optimized.cl index f62608a0e..14ee12d12 100644 --- a/OpenCL/m08000_a3-optimized.cl +++ b/OpenCL/m08000_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w) ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest) ROUND_STEP_Z (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_STEP_Z (16); ROUND_STEP_Z (32); ROUND_STEP_Z (48); diff --git a/OpenCL/m10800_a0-optimized.cl b/OpenCL/m10800_a0-optimized.cl index 7bbb0cd51..26d5ac84f 100644 --- a/OpenCL/m10800_a0-optimized.cl +++ b/OpenCL/m10800_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m10800_a1-optimized.cl b/OpenCL/m10800_a1-optimized.cl index ff3014167..f09627684 100644 --- a/OpenCL/m10800_a1-optimized.cl +++ b/OpenCL/m10800_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m10800_a3-optimized.cl b/OpenCL/m10800_a3-optimized.cl index 031ae5100..eb3a08a41 100644 --- a/OpenCL/m10800_a3-optimized.cl +++ b/OpenCL/m10800_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m21000_a0-optimized.cl b/OpenCL/m21000_a0-optimized.cl index 7b782c877..d0f88c06a 100644 --- a/OpenCL/m21000_a0-optimized.cl +++ b/OpenCL/m21000_a0-optimized.cl @@ -89,7 +89,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m21000_a1-optimized.cl b/OpenCL/m21000_a1-optimized.cl index ba792b588..f9110176d 100644 --- a/OpenCL/m21000_a1-optimized.cl +++ b/OpenCL/m21000_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m21000_a3-optimized.cl b/OpenCL/m21000_a3-optimized.cl index f03742d40..cdb3f0038 100644 --- a/OpenCL/m21000_a3-optimized.cl +++ b/OpenCL/m21000_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); @@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x * ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22200_a0-optimized.cl b/OpenCL/m22200_a0-optimized.cl index cafa7af7c..150380d44 100644 --- a/OpenCL/m22200_a0-optimized.cl +++ b/OpenCL/m22200_a0-optimized.cl @@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22200_a1-optimized.cl b/OpenCL/m22200_a1-optimized.cl index fb40d5406..01d192b99 100644 --- a/OpenCL/m22200_a1-optimized.cl +++ b/OpenCL/m22200_a1-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/OpenCL/m22200_a3-optimized.cl b/OpenCL/m22200_a3-optimized.cl index 211522e7f..48cf61584 100644 --- a/OpenCL/m22200_a3-optimized.cl +++ b/OpenCL/m22200_a3-optimized.cl @@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32 ROUND_STEP (0); - #if defined IS_CUDA || defined IS_HIP + #if defined IS_CUDA ROUND_EXPAND (); ROUND_STEP (16); ROUND_EXPAND (); ROUND_STEP (32); ROUND_EXPAND (); ROUND_STEP (48); diff --git a/src/backend.c b/src/backend.c index 53de2d525..4caff74ce 100644 --- a/src/backend.c +++ b/src/backend.c @@ -8339,18 +8339,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->has_mov64 = false; device_param->has_prmt = false; - device_param->has_vadd = true; - device_param->has_vaddc = true; - device_param->has_vadd_co = true; - device_param->has_vaddc_co = true; - device_param->has_vsub = true; - device_param->has_vsubb = true; - device_param->has_vsub_co = true; - device_param->has_vsubb_co = true; - device_param->has_vadd3 = true; - device_param->has_vbfe = true; - device_param->has_vperm = true; - // device_available_mem HIPcontext hip_context; @@ -9528,7 +9516,27 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (backend_ctx->hip) { - // TODO HIP + // TODO HIP? + // Maybe all devices supported by hip have these instructions guaranteed? + + for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; + + if (device_param->is_hip == false) continue; + + device_param->has_vadd = true; + device_param->has_vaddc = true; + device_param->has_vadd_co = true; + device_param->has_vaddc_co = true; + device_param->has_vsub = true; + device_param->has_vsubb = true; + device_param->has_vsub_co = true; + device_param->has_vsubb_co = true; + device_param->has_vadd3 = true; + device_param->has_vbfe = true; + device_param->has_vperm = true; + } } if (backend_ctx->ocl) @@ -10495,9 +10503,6 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p //hc_asprintf (&hiprtc_options[3], "compute_%d%d", device_param->sm_major, device_param->sm_minor); - // TODO HIP - // no -offload-arch= aka --gpu-architecture because hiprtc gets native arch from hip_context - hiprtc_options[0] = "--gpu-max-threads-per-block=64"; hiprtc_options[1] = ""; hiprtc_options[2] = ""; diff --git a/tools/benchmark_deep.pl b/tools/benchmark_deep.pl index fc7efad2c..df6777441 100755 --- a/tools/benchmark_deep.pl +++ b/tools/benchmark_deep.pl @@ -13,7 +13,7 @@ my $amd_cache = "~/.AMD"; my $hashcat_path = "."; my $kernels_cache = "$hashcat_path/kernels"; my $hashcat_bin = "$hashcat_path/hashcat"; -my $device = 3; +my $device = 1; my $workload_profile = 3; my $runtime = 24; my $sleep_sec = 12; From b25b7c25061416d613094d9ceb18696a9a1ceaa8 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 16 Jul 2021 17:55:12 +0200 Subject: [PATCH 19/22] Add hashcat.hctune entries for scrypt based hash-modes for AMD MI100 --- hashcat.hctune | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/hashcat.hctune b/hashcat.hctune index 077e42e85..b11441ba2 100644 --- a/hashcat.hctune +++ b/hashcat.hctune @@ -274,6 +274,12 @@ GeForce_RTX_3070 ALIAS_nv_sm50_or_higher GeForce_RTX_3080 ALIAS_nv_sm50_or_higher GeForce_RTX_3090 ALIAS_nv_sm50_or_higher +## +## Unmapped GPU +## + +Device_738c ALIAS_AMD_MI100 + ############# ## ENTRIES ## ############# @@ -498,14 +504,20 @@ GeForce_RTX_2080_Ti * 9300 1 532 GeForce_RTX_2080_Ti * 15700 1 68 A GeForce_RTX_2080_Ti * 22700 1 68 A +## 4GB +AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A +AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A +AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A +AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A + ## 8GB Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 8900 1 28 A Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 9300 1 442 A Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 15700 1 28 A Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 22700 1 28 A -## 4GB -AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A -AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A -AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A -AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A +## 32GB +ALIAS_AMD_MI100 * 8900 1 76 A +ALIAS_AMD_MI100 * 9300 1 63 A +ALIAS_AMD_MI100 * 15700 1 76 A +ALIAS_AMD_MI100 * 22700 1 76 A From 5b9823633bc5cc89045606b0eff4c6ebce86e5c4 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 16 Jul 2021 22:37:29 +0200 Subject: [PATCH 20/22] Add missing entry about HIP to docs/changes.txt --- docs/changes.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 28876cd93..c780d9b28 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -4,19 +4,20 @@ ## Algorithms ## -- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 512 bit + boot-mode -- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1024 bit + boot-mode -- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1536 bit + boot-mode - Added hash-mode: AES-128-ECB NOKDF (PT = $salt, key = $pass) - Added hash-mode: AES-192-ECB NOKDF (PT = $salt, key = $pass) - Added hash-mode: AES-256-ECB NOKDF (PT = $salt, key = $pass) - Added hash-mode: iPhone passcode (UID key + System Keybag) - Added hash-mode: MetaMask Wallet +- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 512 bit + boot-mode +- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1024 bit + boot-mode +- Added hash-mode: VeraCrypt PBKDF2-HMAC-Streebog-512 + XTS 1536 bit + boot-mode ## ## Features ## +- Added new backend support for HIP, an OpenCL alternative API for AMD GPUs (similar to CUDA for NVIDIA GPUs) - Added option --markov-inverse to inverse markov statistics, with the idea of reversing the order of the password candidates - Added temperature watchdog and fanspeed readings for CPU and GPU on macOS using iokit - Added temperature watchdog and utilization for CPU on linux using sysfs and procfs @@ -41,13 +42,13 @@ - Alias Devices: Show a warning in case the user specifically listed a device to use which in a later step is skipped because it is an alias of another active device - Backend Information: Added new column showing the PCI Address per CUDA/OpenCL device to easier identify broken cards +- Bcrypt-SHA1/MD5 Kernels: Get rid of local memory use for binary to hex conversion to avoid false negatives on several OpenCL runtimes - CPU Affinity: Allow mask up to 64 processors in Windows and remove call to SetThreadAffinityMask as SetProcessAffinityMask limits all threads in process - Debug Rules: HEX-ify rules debug non-rules outputs that contain colons - KeePass: Increase supported size for KeePass 1 databases from 300kB to 16MB - Potfile: Disable potfile for hash-mode 99999 - VeraCrypt: Increase password length support for non-boot volumes from 64 to 128 - WPA Kernels: Increased performance by 3.5% for backend devices controlled by CUDA backend -- Bcrypt-SHA1/MD5 Kernels: Get rid of local memory use for binary to hex conversion to avoid false negatives on several OpenCL runtimes ## ## Technical From 84082a952e4ebbd726bb853f4c35d52641e669ea Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 16 Jul 2021 22:41:52 +0200 Subject: [PATCH 21/22] hashcat 6.2.3 --- docs/changes.txt | 2 +- docs/readme.txt | 2 +- extra/tab_completion/hashcat.sh | 2 +- src/Makefile | 4 ++-- tools/package_bin.sh | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index c780d9b28..2de32a006 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,4 +1,4 @@ -* changes v6.2.2 -> v6.2.x +* changes v6.2.2 -> v6.2.3 ## ## Algorithms diff --git a/docs/readme.txt b/docs/readme.txt index 07605c0cc..db616273c 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -7,7 +7,7 @@ |_____| /___________/ |_____| /_____/ /_______| : : : -hashcat v6.2.2 +hashcat v6.2.3 ============== AMD GPUs on Linux require "RadeonOpenCompute (ROCm)" Software Platform (3.1 or later) diff --git a/extra/tab_completion/hashcat.sh b/extra/tab_completion/hashcat.sh index a1bd0842c..54440e636 100644 --- a/extra/tab_completion/hashcat.sh +++ b/extra/tab_completion/hashcat.sh @@ -411,7 +411,7 @@ _hashcat_contains () _hashcat () { - local VERSION=6.2.2 + local VERSION=6.2.3 local ATTACK_MODES="0 1 3 6 7 9" local HCCAPX_MESSAGE_PAIRS="0 1 2 3 4 5" diff --git a/src/Makefile b/src/Makefile index e4832860c..943bf2405 100644 --- a/src/Makefile +++ b/src/Makefile @@ -5,8 +5,8 @@ SHARED ?= 0 DEBUG := 0 -PRODUCTION := 0 -PRODUCTION_VERSION := v6.2.2 +PRODUCTION := 1 +PRODUCTION_VERSION := v6.2.3 ENABLE_CUBIN ?= 1 ENABLE_BRAIN ?= 1 ENABLE_UNRAR ?= 1 diff --git a/tools/package_bin.sh b/tools/package_bin.sh index e2f3a7659..38e2c82f4 100755 --- a/tools/package_bin.sh +++ b/tools/package_bin.sh @@ -6,7 +6,7 @@ ## export IN=$HOME/hashcat -export OUT=$HOME/xy/hashcat-6.2.2 +export OUT=$HOME/xy/hashcat-6.2.3 rm -rf $OUT rm -rf $OUT.7z From 261e0f42ab033101b229ae863f3ee72882830d6f Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 16 Jul 2021 23:03:11 +0200 Subject: [PATCH 22/22] Unset PRODUCTION option --- docs/changes.txt | 2 ++ src/Makefile | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/changes.txt b/docs/changes.txt index 2de32a006..b5df97f59 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,3 +1,5 @@ +* changes v6.2.3 -> v6.2.x + * changes v6.2.2 -> v6.2.3 ## diff --git a/src/Makefile b/src/Makefile index 943bf2405..e1ff6a534 100644 --- a/src/Makefile +++ b/src/Makefile @@ -5,7 +5,7 @@ SHARED ?= 0 DEBUG := 0 -PRODUCTION := 1 +PRODUCTION := 0 PRODUCTION_VERSION := v6.2.3 ENABLE_CUBIN ?= 1 ENABLE_BRAIN ?= 1