diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index b3a70df78..a487152ec 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -124,7 +124,9 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b) : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) ); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1 + // HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1 + #elif 0 __asm__ __volatile__ ( "V_SUB_U32 %0, %9, %17;" @@ -176,7 +178,9 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b) : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) ); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1 + // HIP doesnt support these so we stick to OpenCL (aka IS_AMD) - is also faster without asm + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1 + #elif 0 __asm__ __volatile__ ( "V_ADD_U32 %0, %9, %17;" diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl index df0e210a4..8ccb034aa 100644 --- a/OpenCL/inc_platform.cl +++ b/OpenCL/inc_platform.cl @@ -193,46 +193,35 @@ DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p) { volatile const u32 val = 1; - return __atomic_fetch_sub (p, val, __ATOMIC_RELAXED); + return atomicSub (p, val); } DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p) { volatile const u32 val = 1; - return __atomic_fetch_add (p, val, __ATOMIC_RELAXED); + return atomicAdd (p, val); } DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val) { - return __atomic_fetch_or (p, val, __ATOMIC_RELAXED); + return atomicOr (p, val); } -extern "C" __device__ __attribute__((pure)) double __ocml_log2_f64(double); - -DECLSPEC double log2 (double x) +DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused))) { - return __ocml_log2_f64 (x); + return (blockIdx.x * blockDim.x) + threadIdx.x; } -extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_id(uint); -extern "C" __device__ __attribute__((const)) size_t __ockl_get_group_id(uint); -extern "C" __device__ __attribute__((const)) size_t __ockl_get_local_size(uint); -extern "C" __device__ __attribute__((const)) size_t __ockl_get_num_groups(uint); - -DECLSPEC size_t get_global_id (const u32 dimindx) +DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused))) { - return (__ockl_get_group_id (dimindx) * __ockl_get_local_size (dimindx)) + __ockl_get_local_id (dimindx); + return threadIdx.x; } -DECLSPEC size_t get_local_id (const u32 dimindx) +DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused))) { - return __ockl_get_local_id (dimindx); -} - -DECLSPEC size_t get_local_size (const u32 dimindx) -{ - return __ockl_get_local_size (dimindx); + // verify + return blockDim.x; } DECLSPEC u32x rotl32 (const u32x a, const int n) @@ -308,11 +297,8 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n) return out.v64; } -extern "C" __device__ int printf(const char *fmt, ...); -//int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); - -#define FIXED_THREAD_COUNT(n) __attribute__((amdgpu_flat_work_group_size (1, (n)))) -#define SYNC_THREADS() __builtin_amdgcn_s_barrier () +#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0) +#define SYNC_THREADS() __syncthreads () #endif #ifdef IS_OPENCL diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h index 0c715da66..9611ec05d 100644 --- a/OpenCL/inc_types.h +++ b/OpenCL/inc_types.h @@ -21,96 +21,23 @@ typedef unsigned char uchar; typedef unsigned short ushort; typedef unsigned int uint; -typedef unsigned long long ulong; +typedef unsigned long ulong; +typedef unsigned long long ullong; #endif -#ifdef IS_HIP -// https://github.com/llvm-mirror/clang/blob/master/lib/Headers/opencl-c-base.h - -// built-in scalar data types: - -/** - * An unsigned 8-bit integer. - */ -typedef unsigned char uchar; - -/** - * An unsigned 16-bit integer. - */ -typedef unsigned short ushort; - -/** - * An unsigned 32-bit integer. - */ -typedef unsigned int uint; - -/** - * An unsigned 64-bit integer. - */ -typedef unsigned long ulong; - -/** - * The unsigned integer type of the result of the sizeof operator. This - * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS - * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if - * CL_DEVICE_ADDRESS_BITS is 64-bits. - */ -typedef __SIZE_TYPE__ size_t; - -// built-in vector data types: -typedef char char2 __attribute__((ext_vector_type(2))); -typedef char char3 __attribute__((ext_vector_type(3))); -typedef char char4 __attribute__((ext_vector_type(4))); -typedef char char8 __attribute__((ext_vector_type(8))); -typedef char char16 __attribute__((ext_vector_type(16))); -typedef uchar uchar2 __attribute__((ext_vector_type(2))); -typedef uchar uchar3 __attribute__((ext_vector_type(3))); -typedef uchar uchar4 __attribute__((ext_vector_type(4))); -typedef uchar uchar8 __attribute__((ext_vector_type(8))); -typedef uchar uchar16 __attribute__((ext_vector_type(16))); -typedef short short2 __attribute__((ext_vector_type(2))); -typedef short short3 __attribute__((ext_vector_type(3))); -typedef short short4 __attribute__((ext_vector_type(4))); -typedef short short8 __attribute__((ext_vector_type(8))); -typedef short short16 __attribute__((ext_vector_type(16))); -typedef ushort ushort2 __attribute__((ext_vector_type(2))); -typedef ushort ushort3 __attribute__((ext_vector_type(3))); -typedef ushort ushort4 __attribute__((ext_vector_type(4))); -typedef ushort ushort8 __attribute__((ext_vector_type(8))); -typedef ushort ushort16 __attribute__((ext_vector_type(16))); -typedef int int2 __attribute__((ext_vector_type(2))); -typedef int int3 __attribute__((ext_vector_type(3))); -typedef int int4 __attribute__((ext_vector_type(4))); -typedef int int8 __attribute__((ext_vector_type(8))); -typedef int int16 __attribute__((ext_vector_type(16))); -typedef uint uint2 __attribute__((ext_vector_type(2))); -typedef uint uint3 __attribute__((ext_vector_type(3))); -typedef uint uint4 __attribute__((ext_vector_type(4))); -typedef uint uint8 __attribute__((ext_vector_type(8))); -typedef uint uint16 __attribute__((ext_vector_type(16))); -typedef long long2 __attribute__((ext_vector_type(2))); -typedef long long3 __attribute__((ext_vector_type(3))); -typedef long long4 __attribute__((ext_vector_type(4))); -typedef long long8 __attribute__((ext_vector_type(8))); -typedef long long16 __attribute__((ext_vector_type(16))); -typedef ulong ulong2 __attribute__((ext_vector_type(2))); -typedef ulong ulong3 __attribute__((ext_vector_type(3))); -typedef ulong ulong4 __attribute__((ext_vector_type(4))); -typedef ulong ulong8 __attribute__((ext_vector_type(8))); -typedef ulong ulong16 __attribute__((ext_vector_type(16))); -typedef float float2 __attribute__((ext_vector_type(2))); -typedef float float3 __attribute__((ext_vector_type(3))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef float float8 __attribute__((ext_vector_type(8))); -typedef float float16 __attribute__((ext_vector_type(16))); - +#ifdef IS_OPENCL +typedef ulong ullong; +typedef ulong2 ullong2; +typedef ulong4 ullong4; +typedef ulong8 ullong8; +typedef ulong16 ullong16; #endif #ifdef KERNEL_STATIC typedef uchar u8; typedef ushort u16; typedef uint u32; -typedef ulong u64; +typedef ullong u64; #else typedef uint8_t u8; typedef uint16_t u16; @@ -910,7 +837,7 @@ typedef __device_builtin__ struct u64x u64x; typedef VTYPE(uchar, VECT_SIZE) u8x; typedef VTYPE(ushort, VECT_SIZE) u16x; typedef VTYPE(uint, VECT_SIZE) u32x; -typedef VTYPE(ulong, VECT_SIZE) u64x; +typedef VTYPE(ullong, VECT_SIZE) u64x; #define make_u8x (u8x) #define make_u16x (u16x) diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index d44ac87c9..bbd7e23d8 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -32,10 +32,6 @@ #define LOCAL_AS #define KERNEL_FQ extern "C" __global__ #elif defined IS_HIP -#define __device__ __attribute__((device)) -#define __constant__ __attribute__((constant)) -#define __shared__ __attribute__((shared)) -#define __global__ __attribute__((global)) #define CONSTANT_VK __constant__ #define CONSTANT_AS #define GLOBAL_AS diff --git a/OpenCL/inc_zip_inflate.cl b/OpenCL/inc_zip_inflate.cl index d05f6a792..00f762d81 100644 --- a/OpenCL/inc_zip_inflate.cl +++ b/OpenCL/inc_zip_inflate.cl @@ -73,18 +73,16 @@ enum{ MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; -typedef unsigned long mz_ulong; +typedef ullong mz_ulong; #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES typedef unsigned char Byte; typedef unsigned int uInt; -typedef mz_ulong uLong; typedef Byte Bytef; typedef uInt uIntf; typedef char charf; typedef int intf; typedef void *voidpf; -typedef uLong uLongf; typedef void *voidp; typedef void *const voidpc; #define Z_NULL 0 @@ -204,10 +202,6 @@ DECLSPEC void *memset(u8 *s, int c, u32 len){ #define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MZ_DEFAULT_WINDOW_BITS 15 #define TINFL_LZ_DICT_SIZE 32768 -#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) -#define TINFL_MEMCPY_G(d, s, l, p) memcpy_g(d, s, l, p) -#define TINFL_MEMSET(p, c, l) memset(p, c, (u32)l) -#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) // hashcat-patched/hashcat-specific: #ifdef CRC32_IN_INFLATE @@ -583,7 +577,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); } n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); - TINFL_MEMCPY_G(pOut_buf_cur, pIn_buf_cur, n, pStream); + memcpy_g(pOut_buf_cur, pIn_buf_cur, n, pStream); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; @@ -601,7 +595,7 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const mz_uint i; r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; - TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + memset(r->m_tables[1].m_code_size, 5, 32); for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) @@ -618,7 +612,8 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } - MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); + memset(r->m_tables[2].m_code_size, 0, TINFL_MAX_HUFF_SYMBOLS_0); + for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; @@ -633,9 +628,11 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const tinfl_huff_table *pTable; mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; - MZ_CLEAR_OBJ(total_syms); - MZ_CLEAR_OBJ(pTable->m_look_up); - MZ_CLEAR_OBJ(pTable->m_tree); + + memset((u8 *) total_syms, 0, 64); + memset((u8 *) pTable->m_look_up, 0, TINFL_FAST_LOOKUP_SIZE * 2); + memset((u8 *) pTable->m_tree, 0, TINFL_MAX_HUFF_SYMBOLS_0 * 2 * 2); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; used_syms = 0, total = 0; @@ -707,15 +704,18 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; - TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); + + memset(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); + + counter += s; } if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); } - TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); - TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + memcpy(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); + memcpy(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); } } for (;;) diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl index 19f7153ff..38a361b96 100644 --- a/OpenCL/m00500-optimized.cl +++ b/OpenCL/m00500-optimized.cl @@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -45,12 +45,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons tmp4 = hc_bytealign (in3, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; @@ -139,7 +145,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -153,12 +159,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, tmp4 = hc_bytealign (in3, in4, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; @@ -246,7 +258,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; @@ -255,12 +267,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const tmp2 = hc_bytealign (in1, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl index c2c4245e1..7a5adf017 100644 --- a/OpenCL/m01500_a3-pure.cl +++ b/OpenCL/m01500_a3-pure.cl @@ -1664,18 +1664,18 @@ DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63) { - const u32 s001 = (0x001 & SALT) ? 0xffffffff : 0; - const u32 s002 = (0x002 & SALT) ? 0xffffffff : 0; - const u32 s004 = (0x004 & SALT) ? 0xffffffff : 0; - const u32 s008 = (0x008 & SALT) ? 0xffffffff : 0; - const u32 s010 = (0x010 & SALT) ? 0xffffffff : 0; - const u32 s020 = (0x020 & SALT) ? 0xffffffff : 0; - const u32 s040 = (0x040 & SALT) ? 0xffffffff : 0; - const u32 s080 = (0x080 & SALT) ? 0xffffffff : 0; - const u32 s100 = (0x100 & SALT) ? 0xffffffff : 0; - const u32 s200 = (0x200 & SALT) ? 0xffffffff : 0; - const u32 s400 = (0x400 & SALT) ? 0xffffffff : 0; - const u32 s800 = (0x800 & SALT) ? 0xffffffff : 0; + const u32 s001 = (0x001 & SALT) ? 1 : 0; + const u32 s002 = (0x002 & SALT) ? 1 : 0; + const u32 s004 = (0x004 & SALT) ? 1 : 0; + const u32 s008 = (0x008 & SALT) ? 1 : 0; + const u32 s010 = (0x010 & SALT) ? 1 : 0; + const u32 s020 = (0x020 & SALT) ? 1 : 0; + const u32 s040 = (0x040 & SALT) ? 1 : 0; + const u32 s080 = (0x080 & SALT) ? 1 : 0; + const u32 s100 = (0x100 & SALT) ? 1 : 0; + const u32 s200 = (0x200 & SALT) ? 1 : 0; + const u32 s400 = (0x400 & SALT) ? 1 : 0; + const u32 s800 = (0x800 & SALT) ? 1 : 0; KXX_DECL u32 k00, k01, k02, k03, k04, k05; KXX_DECL u32 k06, k07, k08, k09, k10, k11; diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl index cfaad44cc..62194e973 100644 --- a/OpenCL/m01600-optimized.cl +++ b/OpenCL/m01600-optimized.cl @@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -44,12 +44,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons tmp4 = hc_bytealign (in3, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; @@ -138,7 +144,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -152,12 +158,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, tmp4 = hc_bytealign (in3, in4, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; @@ -245,7 +257,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; @@ -254,12 +266,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const tmp2 = hc_bytealign (in1, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index 38099159f..9f8f5a3cc 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u u32 tmp4; u32 tmp5; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -2134,12 +2134,18 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u tmp5 = hc_bytealign (in4, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl index b7c9ddddd..f242259da 100644 --- a/OpenCL/m06300-optimized.cl +++ b/OpenCL/m06300-optimized.cl @@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -41,12 +41,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons tmp4 = hc_bytealign (in3, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; @@ -135,7 +141,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -149,12 +155,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, tmp4 = hc_bytealign (in3, in4, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; @@ -242,7 +254,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; @@ -251,12 +263,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const tmp2 = hc_bytealign (in1, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; + #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); + #endif u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index 7efa5c94e..5fb83a2ad 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in2 = append[2]; u32 in3 = append[3]; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -53,8 +53,15 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3 const u32 tmp4 = hc_bytealign_be (in3, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); @@ -165,7 +172,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u u32 in2 = append[2]; u32 in3 = append[3]; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -173,8 +180,15 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u const u32 tmp4 = hc_bytealign_be (in3, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); @@ -322,7 +336,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u u32 in3 = append[3]; u32 in4 = append[4]; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -331,8 +345,15 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u const u32 tmp5 = hc_bytealign_be (in4, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); @@ -456,7 +477,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const u32 in3 = append[3]; u32 in4 = append[4]; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -465,8 +486,15 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const const u32 tmp5 = hc_bytealign_be (in4, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); @@ -756,7 +784,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in2 = append[2]; u32 in3 = append[3]; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -764,8 +792,15 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3 const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); @@ -915,7 +950,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons u32 in3 = append[3]; u32 in4 = 0x80000000; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -923,8 +958,15 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); @@ -1074,7 +1116,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in3 = append[3]; u32 in4 = append[4]; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -1083,8 +1125,15 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3 const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); diff --git a/OpenCL/m08900-pure.cl b/OpenCL/m08900-pure.cl index 2bd1de39a..74f8a0e66 100644 --- a/OpenCL/m08900-pure.cl +++ b/OpenCL/m08900-pure.cl @@ -24,7 +24,7 @@ typedef struct } scrypt_tmp_t; -#if defined IS_CUDA +#if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } @@ -41,15 +41,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n) #endif -#if defined IS_HIP - -inline __device__ uint4 rotate (const uint4 a, const int n) -{ - return ((a << n) | ((a >> (32 - n)))); -} - -#endif - DECLSPEC uint4 hc_swap32_4 (uint4 v) { return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u)); @@ -66,7 +57,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); -#if defined IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define SALSA20_2R() \ { \ diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl index 737adde4e..323cf8387 100644 --- a/OpenCL/m09000-pure.cl +++ b/OpenCL/m09000-pure.cl @@ -310,6 +310,51 @@ CONSTANT_VK u32a c_pbox[18] = 0x9216d5d9, 0x8979fb1b }; +// Yes, works only with CUDA atm + +#ifdef DYNAMIC_LOCAL +#define BCRYPT_AVOID_BANK_CONFLICTS +#endif + +#ifdef BCRYPT_AVOID_BANK_CONFLICTS + +// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation + +#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE) + (lid)) + +DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + const u64 lid = get_local_id (0); + + return S[KEY32 (lid, key)]; +} + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + const u64 lid = get_local_id (0); + + S[KEY32 (lid, key)] = val; +} + +#undef KEY32 + +#else + +// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation +// makes sense if there are not thread ID's (for instance on CPU) + +DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + return S[key]; +} + +DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + S[key] = val; +} + +#endif + #define BF_ROUND(L,R,N) \ { \ u32 tmp; \ @@ -319,10 +364,10 @@ CONSTANT_VK u32a c_pbox[18] = const u32 r2 = unpack_v8b_from_v32_S ((L)); \ const u32 r3 = unpack_v8a_from_v32_S ((L)); \ \ - tmp = S0[r0]; \ - tmp += S1[r1]; \ - tmp ^= S2[r2]; \ - tmp += S3[r3]; \ + tmp = GET_KEY32 (S0, r0); \ + tmp += GET_KEY32 (S1, r1); \ + tmp ^= GET_KEY32 (S2, r2); \ + tmp += GET_KEY32 (S3, r3); \ \ (R) ^= tmp ^ P[(N)]; \ } @@ -357,6 +402,10 @@ CONSTANT_VK u32a c_pbox[18] = L ^= P[17]; \ } +#ifdef DYNAMIC_LOCAL +extern __shared__ u32 S[]; +#endif + KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t)) { /** @@ -471,22 +520,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS P[i] = c_pbox[i]; } + #ifdef DYNAMIC_LOCAL + // from host + #else LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else LOCAL_AS u32 *S0 = S0_all[lid]; LOCAL_AS u32 *S1 = S1_all[lid]; LOCAL_AS u32 *S2 = S2_all[lid]; LOCAL_AS u32 *S3 = S3_all[lid]; + #endif for (u32 i = 0; i < 256; i++) { - S0[i] = c_sbox0[i]; - S1[i] = c_sbox1[i]; - S2[i] = c_sbox2[i]; - S3[i] = c_sbox3[i]; + SET_KEY32 (S0, i, c_sbox0[i]); + SET_KEY32 (S1, i, c_sbox1[i]); + SET_KEY32 (S2, i, c_sbox2[i]); + SET_KEY32 (S3, i, c_sbox3[i]); } for (u32 i = 0; i < 18; i++) @@ -509,59 +569,59 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS { BF_ENCRYPT (L0, R0); - S0[i + 0] = L0; - S0[i + 1] = R0; + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); BF_ENCRYPT (L0, R0); - S0[i + 2] = L0; - S0[i + 3] = R0; + SET_KEY32 (S0, i + 2, L0); + SET_KEY32 (S0, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) { BF_ENCRYPT (L0, R0); - S1[i + 0] = L0; - S1[i + 1] = R0; + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); BF_ENCRYPT (L0, R0); - S1[i + 2] = L0; - S1[i + 3] = R0; + SET_KEY32 (S1, i + 2, L0); + SET_KEY32 (S1, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) { BF_ENCRYPT (L0, R0); - S2[i + 0] = L0; - S2[i + 1] = R0; + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); BF_ENCRYPT (L0, R0); - S2[i + 2] = L0; - S2[i + 3] = R0; + SET_KEY32 (S2, i + 2, L0); + SET_KEY32 (S2, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) { BF_ENCRYPT (L0, R0); - S3[i + 0] = L0; - S3[i + 1] = R0; + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); BF_ENCRYPT (L0, R0); - S3[i + 2] = L0; - S3[i + 3] = R0; + SET_KEY32 (S3, i + 2, L0); + SET_KEY32 (S3, i + 3, R0); } - // store - tmps[gid].digest[0] = salt_buf[0]; tmps[gid].digest[1] = salt_buf[1]; + // store + for (u32 i = 0; i < 18; i++) { tmps[gid].P[i] = P[i]; @@ -569,10 +629,10 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS for (u32 i = 0; i < 256; i++) { - tmps[gid].S0[i] = S0[i]; - tmps[gid].S1[i] = S1[i]; - tmps[gid].S2[i] = S2[i]; - tmps[gid].S3[i] = S3[i]; + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); } } @@ -602,22 +662,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_loop (KERN_ATTR_TMPS P[i] = tmps[gid].P[i]; } + #ifdef DYNAMIC_LOCAL + // from host + #else LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else LOCAL_AS u32 *S0 = S0_all[lid]; LOCAL_AS u32 *S1 = S1_all[lid]; LOCAL_AS u32 *S2 = S2_all[lid]; LOCAL_AS u32 *S3 = S3_all[lid]; + #endif for (u32 i = 0; i < 256; i++) { - S0[i] = tmps[gid].S0[i]; - S1[i] = tmps[gid].S1[i]; - S2[i] = tmps[gid].S2[i]; - S3[i] = tmps[gid].S3[i]; + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); } // loop @@ -630,8 +701,6 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_loop (KERN_ATTR_TMPS BF_ENCRYPT (L0, R0); } - // store - tmps[gid].digest[0] = L0; tmps[gid].digest[1] = R0; } diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index a9b50a6ac..9779c8fe6 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, u32 i; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC for (i = 0; i < pd; i++) sc[idx++] = pw[i]; sc[idx++] = pw[i] | hc_bytealign_be (bl[0], 0, pm4); @@ -242,8 +242,15 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4); #endif - #ifdef IS_NV - int selector = (0x76543210 >> (pm4 * 4)) & 0xffff; + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV + const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8)); + #endif for (i = 0; i < pd; i++) sc[idx++] = pw[i]; sc[idx++] = pw[i] @@ -263,16 +270,22 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con const u32 om = m % 4; const u32 od = m / 4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om); pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om); pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om); pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om); #endif - #ifdef IS_NV - int selector = (0x76543210 >> (om * 4)) & 0xffff; + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + #if defined IS_NV + const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8)); + #endif pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector); pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector); pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector); diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index be42e185b..d321aee3a 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -42,13 +42,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); + #endif tmp0 = hc_byte_perm (append, 0, selector); tmp1 = hc_byte_perm (0, append, selector); diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index f8ed47771..6112ec296 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -37,13 +37,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); + #endif tmp0 = hc_byte_perm (append, 0, selector); tmp1 = hc_byte_perm (0, append, selector); diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index 6758ffbd4..043ed0d13 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -71,8 +71,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) tmp16 = hc_bytealign_be (carry[15], 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif tmp00 = hc_byte_perm (carry[ 0], 0, selector); tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 85e711b94..4227e48d5 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -69,8 +69,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) tmp16 = hc_bytealign_be (carry[15], 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif tmp00 = hc_byte_perm (carry[ 0], 0, selector); tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index 65b759de0..895d4378c 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -68,8 +68,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) tmp16 = hc_bytealign_be (carry[15], 0, offset); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); + #endif tmp00 = hc_byte_perm (carry[ 0], 0, selector); tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); diff --git a/OpenCL/m15700-pure.cl b/OpenCL/m15700-pure.cl index 10a7aaa14..09819b085 100644 --- a/OpenCL/m15700-pure.cl +++ b/OpenCL/m15700-pure.cl @@ -31,7 +31,7 @@ typedef struct ethereum_scrypt } ethereum_scrypt_t; -#if defined IS_CUDA +#if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } @@ -48,15 +48,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n) #endif -#if defined IS_HIP - -inline __device__ uint4 rotate (const uint4 a, const int n) -{ - return ((a << n) | ((a >> (32 - n)))); -} - -#endif - DECLSPEC uint4 hc_swap32_4 (uint4 v) { return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u)); @@ -73,7 +64,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); -#if defined IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define SALSA20_2R() \ { \ diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl index 061c61bd8..f98aca9c0 100644 --- a/OpenCL/m18600-pure.cl +++ b/OpenCL/m18600-pure.cl @@ -319,6 +319,51 @@ CONSTANT_VK u32a c_pbox[18] = 0x9216d5d9, 0x8979fb1b }; +// Yes, works only with CUDA atm + +#ifdef DYNAMIC_LOCAL +#define BCRYPT_AVOID_BANK_CONFLICTS +#endif + +#ifdef BCRYPT_AVOID_BANK_CONFLICTS + +// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation + +#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE_COMP) + (lid)) + +DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + const u64 lid = get_local_id (0); + + return S[KEY32 (lid, key)]; +} + +DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + const u64 lid = get_local_id (0); + + S[KEY32 (lid, key)] = val; +} + +#undef KEY32 + +#else + +// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation +// makes sense if there are not thread ID's (for instance on CPU) + +DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key) +{ + return S[key]; +} + +DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val) +{ + S[key] = val; +} + +#endif + #define BF_ROUND(L,R,N) \ { \ u32 tmp; \ @@ -328,10 +373,10 @@ CONSTANT_VK u32a c_pbox[18] = const u32 r2 = unpack_v8b_from_v32_S ((L)); \ const u32 r3 = unpack_v8a_from_v32_S ((L)); \ \ - tmp = S0[r0]; \ - tmp += S1[r1]; \ - tmp ^= S2[r2]; \ - tmp += S3[r3]; \ + tmp = GET_KEY32 (S0, r0); \ + tmp += GET_KEY32 (S1, r1); \ + tmp ^= GET_KEY32 (S2, r2); \ + tmp += GET_KEY32 (S3, r3); \ \ (R) ^= tmp ^ P[(N)]; \ } @@ -366,6 +411,10 @@ CONSTANT_VK u32a c_pbox[18] = L ^= P[17]; \ } +#ifdef DYNAMIC_LOCAL +extern __shared__ u32 S[]; +#endif + DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) { digest[0] = ipad[0]; @@ -586,7 +635,7 @@ KERNEL_FQ void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) } } -KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) +KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); @@ -616,22 +665,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ P[i] = c_pbox[i] ^ ukey[i % 4]; } - LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; - LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; - LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; - LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE_COMP][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE_COMP][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE_COMP][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE_COMP][256]; + #endif + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 3); + #else LOCAL_AS u32 *S0 = S0_all[lid]; LOCAL_AS u32 *S1 = S1_all[lid]; LOCAL_AS u32 *S2 = S2_all[lid]; LOCAL_AS u32 *S3 = S3_all[lid]; + #endif for (u32 i = 0; i < 256; i++) { - S0[i] = c_sbox0[i]; - S1[i] = c_sbox1[i]; - S2[i] = c_sbox2[i]; - S3[i] = c_sbox3[i]; + SET_KEY32 (S0, i, c_sbox0[i]); + SET_KEY32 (S1, i, c_sbox1[i]); + SET_KEY32 (S2, i, c_sbox2[i]); + SET_KEY32 (S3, i, c_sbox3[i]); } u32 L0 = 0; @@ -649,52 +709,52 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ { BF_ENCRYPT (L0, R0); - S0[i + 0] = L0; - S0[i + 1] = R0; + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); BF_ENCRYPT (L0, R0); - S0[i + 2] = L0; - S0[i + 3] = R0; + SET_KEY32 (S0, i + 2, L0); + SET_KEY32 (S0, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) { BF_ENCRYPT (L0, R0); - S1[i + 0] = L0; - S1[i + 1] = R0; + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); BF_ENCRYPT (L0, R0); - S1[i + 2] = L0; - S1[i + 3] = R0; + SET_KEY32 (S1, i + 2, L0); + SET_KEY32 (S1, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) { BF_ENCRYPT (L0, R0); - S2[i + 0] = L0; - S2[i + 1] = R0; + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); BF_ENCRYPT (L0, R0); - S2[i + 2] = L0; - S2[i + 3] = R0; + SET_KEY32 (S2, i + 2, L0); + SET_KEY32 (S2, i + 3, R0); } for (u32 i = 0; i < 256; i += 4) { BF_ENCRYPT (L0, R0); - S3[i + 0] = L0; - S3[i + 1] = R0; + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); BF_ENCRYPT (L0, R0); - S3[i + 2] = L0; - S3[i + 3] = R0; + SET_KEY32 (S3, i + 2, L0); + SET_KEY32 (S3, i + 3, R0); } GLOBAL_AS const odf11_t *es = &esalt_bufs[DIGESTS_OFFSET]; diff --git a/OpenCL/m22700-pure.cl b/OpenCL/m22700-pure.cl index 4ecc345ce..a28b458c2 100644 --- a/OpenCL/m22700-pure.cl +++ b/OpenCL/m22700-pure.cl @@ -72,7 +72,7 @@ DECLSPEC int is_valid_bitcoinj (const u32 *w) return 1; } -#if defined IS_CUDA +#if defined IS_CUDA || defined IS_HIP inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); } inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); } @@ -89,15 +89,6 @@ inline __device__ uint4 rotate (const uint4 a, const int n) #endif -#if defined IS_HIP - -inline __device__ uint4 rotate (const uint4 a, const int n) -{ - return ((a << n) | ((a >> (32 - n)))); -} - -#endif - DECLSPEC uint4 hc_swap32_4 (uint4 v) { return (rotate ((v & 0x00FF00FF), 24u) | rotate ((v & 0xFF00FF00), 8u)); @@ -114,7 +105,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v) #define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s)); -#if defined IS_CUDA +#if defined IS_CUDA || defined IS_HIP #define SALSA20_2R() \ { \ diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl index af287574e..63e84cbf7 100644 --- a/OpenCL/m23700-pure.cl +++ b/OpenCL/m23700-pure.cl @@ -145,13 +145,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); + #endif tmp0 = hc_byte_perm (append, 0, selector); tmp1 = hc_byte_perm (0, append, selector); diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl index f6d345677..530c3268d 100644 --- a/OpenCL/m23800-pure.cl +++ b/OpenCL/m23800-pure.cl @@ -56,13 +56,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif - #ifdef IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV + + #if defined IS_NV const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; + #endif + + #if (defined IS_AMD || defined IS_HIP) + const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); + #endif tmp0 = hc_byte_perm (append, 0, selector); tmp1 = hc_byte_perm (0, append, selector); diff --git a/OpenCL/m25000-pure.cl b/OpenCL/m25000-pure.cl new file mode 100644 index 000000000..249aa95fd --- /dev/null +++ b/OpenCL/m25000-pure.cl @@ -0,0 +1,590 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_md5.cl" +#include "inc_hash_sha1.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +#define SNMPV3_SALT_MAX 1500 +#define SNMPV3_ENGINEID_MAX 34 +#define SNMPV3_MSG_AUTH_PARAMS_LEN 12 +#define SNMPV3_ROUNDS 1048576 +#define SNMPV3_MAX_PW_LENGTH 64 + +#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32) +#define SNMPV3_HASH_ELEMS_MD5 4 +#define SNMPV3_HASH_ELEMS_SHA1 8 // 8 = aligned 5 + +#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64 +#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64 +#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9 + +typedef struct hmac_md5_tmp +{ + u32 tmp_md5[SNMPV3_TMP_ELEMS]; + u32 tmp_sha1[SNMPV3_TMP_ELEMS]; + + u32 h_md5[SNMPV3_HASH_ELEMS_MD5]; + u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1]; + +} hmac_md5_tmp_t; + +typedef struct snmpv3 +{ + u32 salt_buf[SNMPV3_MAX_SALT_ELEMS]; + u32 salt_len; + + u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS]; + u32 engineID_len; + + u32 packet_number[SNMPV3_MAX_PNUM_ELEMS]; + +} snmpv3_t; + +KERNEL_FQ void m25000_init (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32 w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + u8 *src_ptr = (u8 *) w; + + // password 64 times, also swapped + + u32 dst_buf[16]; + + u8 *dst_ptr = (u8 *) dst_buf; + + int tmp_idx = 0; + + for (int i = 0; i < 64; i++) + { + for (int j = 0; j < pw_len; j++) + { + const int dst_idx = tmp_idx & 63; + + dst_ptr[dst_idx] = src_ptr[j]; + + // write to global memory every time 64 byte are written into cache + + if (dst_idx == 63) + { + const int tmp_idx4 = (tmp_idx - 63) / 4; + + // md5 + + tmps[gid].tmp_md5[tmp_idx4 + 0] = dst_buf[ 0]; + tmps[gid].tmp_md5[tmp_idx4 + 1] = dst_buf[ 1]; + tmps[gid].tmp_md5[tmp_idx4 + 2] = dst_buf[ 2]; + tmps[gid].tmp_md5[tmp_idx4 + 3] = dst_buf[ 3]; + tmps[gid].tmp_md5[tmp_idx4 + 4] = dst_buf[ 4]; + tmps[gid].tmp_md5[tmp_idx4 + 5] = dst_buf[ 5]; + tmps[gid].tmp_md5[tmp_idx4 + 6] = dst_buf[ 6]; + tmps[gid].tmp_md5[tmp_idx4 + 7] = dst_buf[ 7]; + tmps[gid].tmp_md5[tmp_idx4 + 8] = dst_buf[ 8]; + tmps[gid].tmp_md5[tmp_idx4 + 9] = dst_buf[ 9]; + tmps[gid].tmp_md5[tmp_idx4 + 10] = dst_buf[10]; + tmps[gid].tmp_md5[tmp_idx4 + 11] = dst_buf[11]; + tmps[gid].tmp_md5[tmp_idx4 + 12] = dst_buf[12]; + tmps[gid].tmp_md5[tmp_idx4 + 13] = dst_buf[13]; + tmps[gid].tmp_md5[tmp_idx4 + 14] = dst_buf[14]; + tmps[gid].tmp_md5[tmp_idx4 + 15] = dst_buf[15]; + + // sha1 + + tmps[gid].tmp_sha1[tmp_idx4 + 0] = hc_swap32_S (dst_buf[ 0]); + tmps[gid].tmp_sha1[tmp_idx4 + 1] = hc_swap32_S (dst_buf[ 1]); + tmps[gid].tmp_sha1[tmp_idx4 + 2] = hc_swap32_S (dst_buf[ 2]); + tmps[gid].tmp_sha1[tmp_idx4 + 3] = hc_swap32_S (dst_buf[ 3]); + tmps[gid].tmp_sha1[tmp_idx4 + 4] = hc_swap32_S (dst_buf[ 4]); + tmps[gid].tmp_sha1[tmp_idx4 + 5] = hc_swap32_S (dst_buf[ 5]); + tmps[gid].tmp_sha1[tmp_idx4 + 6] = hc_swap32_S (dst_buf[ 6]); + tmps[gid].tmp_sha1[tmp_idx4 + 7] = hc_swap32_S (dst_buf[ 7]); + tmps[gid].tmp_sha1[tmp_idx4 + 8] = hc_swap32_S (dst_buf[ 8]); + tmps[gid].tmp_sha1[tmp_idx4 + 9] = hc_swap32_S (dst_buf[ 9]); + tmps[gid].tmp_sha1[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]); + tmps[gid].tmp_sha1[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]); + tmps[gid].tmp_sha1[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]); + tmps[gid].tmp_sha1[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]); + tmps[gid].tmp_sha1[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]); + tmps[gid].tmp_sha1[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]); + } + + tmp_idx++; + } + } + + // hash md5 + + tmps[gid].h_md5[0] = MD5M_A; + tmps[gid].h_md5[1] = MD5M_B; + tmps[gid].h_md5[2] = MD5M_C; + tmps[gid].h_md5[3] = MD5M_D; + + // hash sha1 + + tmps[gid].h_sha1[0] = SHA1M_A; + tmps[gid].h_sha1[1] = SHA1M_B; + tmps[gid].h_sha1[2] = SHA1M_C; + tmps[gid].h_sha1[3] = SHA1M_D; + tmps[gid].h_sha1[4] = SHA1M_E; +} + +KERNEL_FQ void m25000_loop (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 h_md5[4]; + + h_md5[0] = tmps[gid].h_md5[0]; + h_md5[1] = tmps[gid].h_md5[1]; + h_md5[2] = tmps[gid].h_md5[2]; + h_md5[3] = tmps[gid].h_md5[3]; + + u32 h_sha1[5]; + + h_sha1[0] = tmps[gid].h_sha1[0]; + h_sha1[1] = tmps[gid].h_sha1[1]; + h_sha1[2] = tmps[gid].h_sha1[2]; + h_sha1[3] = tmps[gid].h_sha1[3]; + h_sha1[4] = tmps[gid].h_sha1[4]; + + const u32 pw_len = pws[gid].pw_len; + + const int pw_len64 = pw_len * 64; + + #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32) + // for pw length > 64 we use global memory reads + + if (pw_len < 64) + { + u32 tmp_shared[SNMPV3_TMP_ELEMS_OPT]; + + // md5 + + for (int i = 0; i < pw_len64 / 4; i++) + { + tmp_shared[i] = tmps[gid].tmp_md5[i]; + } + + for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64) + { + const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = tmp_shared[idx + 0]; + w0[1] = tmp_shared[idx + 1]; + w0[2] = tmp_shared[idx + 2]; + w0[3] = tmp_shared[idx + 3]; + w1[0] = tmp_shared[idx + 4]; + w1[1] = tmp_shared[idx + 5]; + w1[2] = tmp_shared[idx + 6]; + w1[3] = tmp_shared[idx + 7]; + w2[0] = tmp_shared[idx + 8]; + w2[1] = tmp_shared[idx + 9]; + w2[2] = tmp_shared[idx + 10]; + w2[3] = tmp_shared[idx + 11]; + w3[0] = tmp_shared[idx + 12]; + w3[1] = tmp_shared[idx + 13]; + w3[2] = tmp_shared[idx + 14]; + w3[3] = tmp_shared[idx + 15]; + + md5_transform (w0, w1, w2, w3, h_md5); + } + + // sha1 + + for (int i = 0; i < pw_len64 / 4; i++) + { + tmp_shared[i] = tmps[gid].tmp_sha1[i]; + } + + for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64) + { + const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = tmp_shared[idx + 0]; + w0[1] = tmp_shared[idx + 1]; + w0[2] = tmp_shared[idx + 2]; + w0[3] = tmp_shared[idx + 3]; + w1[0] = tmp_shared[idx + 4]; + w1[1] = tmp_shared[idx + 5]; + w1[2] = tmp_shared[idx + 6]; + w1[3] = tmp_shared[idx + 7]; + w2[0] = tmp_shared[idx + 8]; + w2[1] = tmp_shared[idx + 9]; + w2[2] = tmp_shared[idx + 10]; + w2[3] = tmp_shared[idx + 11]; + w3[0] = tmp_shared[idx + 12]; + w3[1] = tmp_shared[idx + 13]; + w3[2] = tmp_shared[idx + 14]; + w3[3] = tmp_shared[idx + 15]; + + sha1_transform (w0, w1, w2, w3, h_sha1); + } + } + else + { + for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64) + { + const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + // md5 + + w0[0] = tmps[gid].tmp_md5[idx + 0]; + w0[1] = tmps[gid].tmp_md5[idx + 1]; + w0[2] = tmps[gid].tmp_md5[idx + 2]; + w0[3] = tmps[gid].tmp_md5[idx + 3]; + w1[0] = tmps[gid].tmp_md5[idx + 4]; + w1[1] = tmps[gid].tmp_md5[idx + 5]; + w1[2] = tmps[gid].tmp_md5[idx + 6]; + w1[3] = tmps[gid].tmp_md5[idx + 7]; + w2[0] = tmps[gid].tmp_md5[idx + 8]; + w2[1] = tmps[gid].tmp_md5[idx + 9]; + w2[2] = tmps[gid].tmp_md5[idx + 10]; + w2[3] = tmps[gid].tmp_md5[idx + 11]; + w3[0] = tmps[gid].tmp_md5[idx + 12]; + w3[1] = tmps[gid].tmp_md5[idx + 13]; + w3[2] = tmps[gid].tmp_md5[idx + 14]; + w3[3] = tmps[gid].tmp_md5[idx + 15]; + + md5_transform (w0, w1, w2, w3, h_md5); + + // sha1 + + w0[0] = tmps[gid].tmp_sha1[idx + 0]; + w0[1] = tmps[gid].tmp_sha1[idx + 1]; + w0[2] = tmps[gid].tmp_sha1[idx + 2]; + w0[3] = tmps[gid].tmp_sha1[idx + 3]; + w1[0] = tmps[gid].tmp_sha1[idx + 4]; + w1[1] = tmps[gid].tmp_sha1[idx + 5]; + w1[2] = tmps[gid].tmp_sha1[idx + 6]; + w1[3] = tmps[gid].tmp_sha1[idx + 7]; + w2[0] = tmps[gid].tmp_sha1[idx + 8]; + w2[1] = tmps[gid].tmp_sha1[idx + 9]; + w2[2] = tmps[gid].tmp_sha1[idx + 10]; + w2[3] = tmps[gid].tmp_sha1[idx + 11]; + w3[0] = tmps[gid].tmp_sha1[idx + 12]; + w3[1] = tmps[gid].tmp_sha1[idx + 13]; + w3[2] = tmps[gid].tmp_sha1[idx + 14]; + w3[3] = tmps[gid].tmp_sha1[idx + 15]; + + sha1_transform (w0, w1, w2, w3, h_sha1); + } + } + + tmps[gid].h_md5[0] = h_md5[0]; + tmps[gid].h_md5[1] = h_md5[1]; + tmps[gid].h_md5[2] = h_md5[2]; + tmps[gid].h_md5[3] = h_md5[3]; + + tmps[gid].h_sha1[0] = h_sha1[0]; + tmps[gid].h_sha1[1] = h_sha1[1]; + tmps[gid].h_sha1[2] = h_sha1[2]; + tmps[gid].h_sha1[3] = h_sha1[3]; + tmps[gid].h_sha1[4] = h_sha1[4]; +} + +KERNEL_FQ void m25000_comp (KERN_ATTR_TMPS_ESALT (hmac_md5_tmp_t, snmpv3_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + // md5 + + w0[0] = 0x00000080; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 1048576 * 8; + w3[3] = 0; + + u32 h_md5[4]; + + h_md5[0] = tmps[gid].h_md5[0]; + h_md5[1] = tmps[gid].h_md5[1]; + h_md5[2] = tmps[gid].h_md5[2]; + h_md5[3] = tmps[gid].h_md5[3]; + + md5_transform (w0, w1, w2, w3, h_md5); + + // sha1 + + w0[0] = 0x80000000; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 1048576 * 8; + + u32 h_sha1[5]; + + h_sha1[0] = tmps[gid].h_sha1[0]; + h_sha1[1] = tmps[gid].h_sha1[1]; + h_sha1[2] = tmps[gid].h_sha1[2]; + h_sha1[3] = tmps[gid].h_sha1[3]; + h_sha1[4] = tmps[gid].h_sha1[4]; + + sha1_transform (w0, w1, w2, w3, h_sha1); + + md5_ctx_t md5_ctx; + sha1_ctx_t sha1_ctx; + + md5_init (&md5_ctx); + sha1_init (&sha1_ctx); + + u32 w[16]; + + // md5 + + w[ 0] = h_md5[0]; + w[ 1] = h_md5[1]; + w[ 2] = h_md5[2]; + w[ 3] = h_md5[3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + md5_update (&md5_ctx, w, 16); + + // sha1 + + w[ 0] = h_sha1[0]; + w[ 1] = h_sha1[1]; + w[ 2] = h_sha1[2]; + w[ 3] = h_sha1[3]; + w[ 4] = h_sha1[4]; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + sha1_update (&sha1_ctx, w, 20); + + // engineID + + md5_update_global (&md5_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len); + + sha1_update_global_swap (&sha1_ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len); + + // md5 + + w[ 0] = h_md5[0]; + w[ 1] = h_md5[1]; + w[ 2] = h_md5[2]; + w[ 3] = h_md5[3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + md5_update (&md5_ctx, w, 16); + + // sha1 + + w[ 0] = h_sha1[0]; + w[ 1] = h_sha1[1]; + w[ 2] = h_sha1[2]; + w[ 3] = h_sha1[3]; + w[ 4] = h_sha1[4]; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + sha1_update (&sha1_ctx, w, 20); + + md5_final (&md5_ctx); + sha1_final (&sha1_ctx); + + // md5 + + w[ 0] = md5_ctx.h[0]; + w[ 1] = md5_ctx.h[1]; + w[ 2] = md5_ctx.h[2]; + w[ 3] = md5_ctx.h[3]; + w[ 4] = 0; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + md5_hmac_ctx_t md5_hmac_ctx; + + md5_hmac_init (&md5_hmac_ctx, w, 16); + + md5_hmac_update_global (&md5_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); + + md5_hmac_final (&md5_hmac_ctx); + + { + const u32 r0 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R0]); + const u32 r1 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R1]); + const u32 r2 = hc_swap32_S (md5_hmac_ctx.opad.h[DGST_R2]); + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif + } + + // sha1 + + w[ 0] = sha1_ctx.h[0]; + w[ 1] = sha1_ctx.h[1]; + w[ 2] = sha1_ctx.h[2]; + w[ 3] = sha1_ctx.h[3]; + w[ 4] = sha1_ctx.h[4]; + w[ 5] = 0; + w[ 6] = 0; + w[ 7] = 0; + w[ 8] = 0; + w[ 9] = 0; + w[10] = 0; + w[11] = 0; + w[12] = 0; + w[13] = 0; + w[14] = 0; + w[15] = 0; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init (&sha1_hmac_ctx, w, 20); + + sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len); + + sha1_hmac_final (&sha1_hmac_ctx); + + { + const u32 r0 = sha1_hmac_ctx.opad.h[DGST_R0]; + const u32 r1 = sha1_hmac_ctx.opad.h[DGST_R1]; + const u32 r2 = sha1_hmac_ctx.opad.h[DGST_R2]; + const u32 r3 = 0; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif + } +} diff --git a/OpenCL/m25100-pure.cl b/OpenCL/m25100-pure.cl index 72cb0ba47..1fc28c664 100644 --- a/OpenCL/m25100-pure.cl +++ b/OpenCL/m25100-pure.cl @@ -18,8 +18,8 @@ #define COMPARE_M "inc_comp_multi.cl" #define SNMPV3_SALT_MAX 1500 -#define SNMPV3_ENGINEID_MAX 32 -#define SNMPV3_MSG_AUTH_PARAMS_MAX 12 +#define SNMPV3_ENGINEID_MAX 34 +#define SNMPV3_MSG_AUTH_PARAMS_LEN 12 #define SNMPV3_ROUNDS 1048576 #define SNMPV3_MAX_PW_LENGTH 64 diff --git a/OpenCL/m25200-pure.cl b/OpenCL/m25200-pure.cl index f72fce044..e36caaf6e 100644 --- a/OpenCL/m25200-pure.cl +++ b/OpenCL/m25200-pure.cl @@ -18,13 +18,17 @@ #define COMPARE_M "inc_comp_multi.cl" #define SNMPV3_SALT_MAX 1500 -#define SNMPV3_ENGINEID_MAX 32 -#define SNMPV3_MSG_AUTH_PARAMS_MAX 12 +#define SNMPV3_ENGINEID_MAX 34 +#define SNMPV3_MSG_AUTH_PARAMS_LEN 12 #define SNMPV3_ROUNDS 1048576 #define SNMPV3_MAX_PW_LENGTH 64 -#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32) -#define SNMPV3_HASH_ELEMS 8 // 8 = aligned 5 +#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32) +#define SNMPV3_HASH_ELEMS 8 // 8 = aligned 5 + +#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64 +#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64 +#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9 typedef struct hmac_sha1_tmp { @@ -33,10 +37,6 @@ typedef struct hmac_sha1_tmp } hmac_sha1_tmp_t; -#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64 -#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64 -#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9 - typedef struct snmpv3 { u32 salt_buf[SNMPV3_MAX_SALT_ELEMS]; diff --git a/docs/changes.txt b/docs/changes.txt index 0de6f097a..c5d3b4ca5 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -5,6 +5,7 @@ ## - Added option --multiply-accel-disable (short: -M) to disable multiply the kernel-accel with the multiprocessor count automatism +- HIP Backend: Added support to support HIP 4.4 and later, but added check to rule out older versions because they are incompatible ## ## Bugs @@ -18,7 +19,10 @@ ## Improvements ## +- AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+ +- AMD GPUs: On Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32 - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase +- Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1 - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M - Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads - Shared Memory: Calculate kernel dynamic memory size based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN @@ -28,17 +32,23 @@ ## Technical ## +- ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs +- AMD Driver: Updated requirement for AMD Linux driver to ROCm 4.4 or later because of new HIP Interface +- AMD Driver: Updated requirement for AMD Windows driver to Adrenalin 21.2.1 or later because of new ADL library - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte -- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename +- ECC secp256k1: Removed the inline assembly code for AMD GPUs because the latest JIT compilers optimize it with the same efficiency - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows +- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename +- Memory Management: Refactored the code responsible for limiting kernel accel in order to avoid out of -host- memory situations - SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs ## ## Algorithms ## -- Added hash-mode: SNMPv3 HMAC-SHA1-96 +- Added hash-mode: SNMPv3 HMAC-MD5-96/HMAC-SHA1-96 - Added hash-mode: SNMPv3 HMAC-MD5-96 +- Added hash-mode: SNMPv3 HMAC-SHA1-96 - Added hash-mode: SNMPv3 HMAC-SHA224-128 * changes v6.2.2 -> v6.2.3 diff --git a/docs/readme.txt b/docs/readme.txt index b5aad87aa..a144e454a 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -10,8 +10,8 @@ hashcat v6.2.3 ============== -AMD GPUs on Linux require "RadeonOpenCompute (ROCm)" Software Platform (3.1 or later) -AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (20.2.2 or later) +AMD GPUs on Linux require "AMD ROCm" (4.4 or later) +AMD GPUs on Windows require "AMD Radeon Adrenalin 2020 Edition" (21.2.1 or later) Intel CPUs require "OpenCL Runtime for Intel Core and Intel Xeon Processors" (16.1.1 or later) NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or later) @@ -156,6 +156,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - IKE-PSK MD5 - IKE-PSK SHA1 - SNMPv3 HMAC-MD5-96 +- SNMPv3 HMAC-MD5-96/HMAC-SHA1-96 - SNMPv3 HMAC-SHA1-96 - SNMPv3 HMAC-SHA224-128 - WPA-EAPOL-PBKDF2 diff --git a/hashcat.hctune b/hashcat.hctune index 2e1951eef..2b99ee149 100644 --- a/hashcat.hctune +++ b/hashcat.hctune @@ -279,7 +279,14 @@ GeForce_RTX_3090 ALIAS_nv_sm50_or_higher ## Device_738c ALIAS_AMD_MI100 + +AMD_Radeon_(TM)_RX_480_Graphics ALIAS_AMD_RX480 + +Vega_10_XL/XT_[Radeon_RX_Vega_56/64] ALIAS_AMD_Vega64 +AMD_Radeon_Vega_64 ALIAS_AMD_Vega64 + Device_73bf ALIAS_AMD_RX6900XT +AMD_Radeon_RX_6900_XT ALIAS_AMD_RX6900XT ############# ## ENTRIES ## @@ -486,22 +493,41 @@ DEVICE_TYPE_GPU * 14500 1 A ## ## Find the ideal -n value, then store it here along with the proper compute device name. ## Formatting guidelines are availabe at the top of this document. +## +## ------------------------------------------------- +## +## You can also ignore all theoretical derivations and semi-automate the process in the real scenario (I prefer this approach): +## +## 1. For example, to find the value for 8900, first create a valid hash for 8900 as follows: +## +## $ ./hashcat --example-hashes -m 8900 | grep Example.Hash | grep -v Format | cut -b 25- > tmp.hash.8900 +## +## 2. Now let it iterate through all -n values to a certain point. In this case, I'm using 200, but in general it's a value that is at least twice that of the multiprocessor. If you don't mind you can just leave it as it is, it just runs a little longer. +## +## $ export i=1; while [ $i -ne 201 ]; do echo $i; ./hashcat --quiet tmp.hash.8900 --keep-guessing --self-test-disable --markov-disable --restore-disable --outfile-autohex-disable --wordlist-autohex-disable --potfile-disable --logfile-disable --hwmon-disable --status --status-timer 1 --runtime 28 --machine-readable --optimized-kernel-enable --workload-profile 3 --hash-type 8900 --attack-mode 3 ?b?b?b?b?b?b?b --backend-devices 1 --force -n $i; i=$(($i+1)); done | tee x +## +## 3. Determine the highest measured H/s speed. But don't just use the highest value. Instead, use the number that seems most stable, usually at the beginning. +## +## $ grep "$(printf 'STATUS\t3')" x | cut -f4 -d$'\t' | sort -n | tail +## +## 4. To match the speed you have chosen to the correct value in the "x" file, simply search for it in it. Then go up a little on the block where you found him. The value -n is the single value that begins before the block start. If you have multiple blocks at the same speed, choose the lowest value for -n +## ## 4GB -GeForce_GTX_980 * 8900 1 28 A +GeForce_GTX_980 * 8900 1 29 A GeForce_GTX_980 * 9300 1 128 A -GeForce_GTX_980 * 15700 1 28 A -GeForce_GTX_980 * 22700 1 28 A +GeForce_GTX_980 * 15700 1 24 A +GeForce_GTX_980 * 22700 1 29 A ## 8GB -GeForce_GTX_1080 * 8900 1 14 A +GeForce_GTX_1080 * 8900 1 15 A GeForce_GTX_1080 * 9300 1 256 A -GeForce_GTX_1080 * 15700 1 14 A -GeForce_GTX_1080 * 22700 1 14 A +GeForce_GTX_1080 * 15700 1 28 A +GeForce_GTX_1080 * 22700 1 15 A ## 11GB GeForce_RTX_2080_Ti * 8900 1 68 A -GeForce_RTX_2080_Ti * 9300 1 532 A +GeForce_RTX_2080_Ti * 9300 1 528 A GeForce_RTX_2080_Ti * 15700 1 68 A GeForce_RTX_2080_Ti * 22700 1 68 A @@ -509,7 +535,7 @@ GeForce_RTX_2080_Ti * 22700 1 68 GeForce_RTX_3060_Ti * 8900 1 51 A GeForce_RTX_3060_Ti * 9300 1 256 A GeForce_RTX_3060_Ti * 15700 1 11 A -GeForce_RTX_3060_Ti * 22700 1 43 A +GeForce_RTX_3060_Ti * 22700 1 51 A ## 8GB GeForce_RTX_3070 * 8900 1 46 A @@ -517,26 +543,32 @@ GeForce_RTX_3070 * 9300 1 368 GeForce_RTX_3070 * 15700 1 22 A GeForce_RTX_3070 * 22700 1 46 A +## 24GB +GeForce_RTX_3090 * 8900 1 82 A +GeForce_RTX_3090 * 9300 1 984 A +GeForce_RTX_3090 * 15700 1 82 A +GeForce_RTX_3090 * 22700 1 82 A + ## 4GB -AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A -AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A -AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A -AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A +ALIAS_AMD_RX480 * 8900 1 15 A +ALIAS_AMD_RX480 * 9300 1 232 A +ALIAS_AMD_RX480 * 15700 1 58 A +ALIAS_AMD_RX480 * 22700 1 15 A ## 8GB -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 8900 1 28 A -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 9300 1 442 A -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 15700 1 28 A -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 22700 1 28 A +ALIAS_AMD_Vega64 * 8900 1 31 A +ALIAS_AMD_Vega64 * 9300 1 440 A +ALIAS_AMD_Vega64 * 15700 1 53 A +ALIAS_AMD_Vega64 * 22700 1 31 A -## 32GB, WF64 -ALIAS_AMD_MI100 * 8900 1 76 A -ALIAS_AMD_MI100 * 9300 1 288 A -ALIAS_AMD_MI100 * 15700 1 76 A -ALIAS_AMD_MI100 * 22700 1 76 A +## 32GB +ALIAS_AMD_MI100 * 8900 1 79 A +ALIAS_AMD_MI100 * 9300 1 1000 A +ALIAS_AMD_MI100 * 15700 1 120 A +ALIAS_AMD_MI100 * 22700 1 79 A -## 16GB, WF32 -ALIAS_AMD_RX6900XT * 8900 1 62 A -ALIAS_AMD_RX6900XT * 9300 1 704 A -ALIAS_AMD_RX6900XT * 15700 1 62 A -ALIAS_AMD_RX6900XT * 22700 1 62 A +## 16GB +ALIAS_AMD_RX6900XT * 8900 1 59 A +ALIAS_AMD_RX6900XT * 9300 1 720 A +ALIAS_AMD_RX6900XT * 15700 1 56 A +ALIAS_AMD_RX6900XT * 22700 1 59 A diff --git a/include/backend.h b/include/backend.h index 2e41f43c8..b703f0fd7 100644 --- a/include/backend.h +++ b/include/backend.h @@ -89,53 +89,50 @@ int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state, int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state); int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut); -int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); -int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog); -int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options); -int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet); -int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log); -int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet); -int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx); -int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor); +int hc_hipCreateProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames); +int hc_hipDestroyProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram *prog); +int hc_hipCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int numOptions, const char * const *options); +int hc_hipGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *logSizeRet); +int hc_hipGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *log); +int hc_hipGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *codeSizeRet); +int hc_hipGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *code); -int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev); -int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); -int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); -int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config); +int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev); +int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx); +int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx); +int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx); +int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx); int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx); -int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev); +int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, hipDevice_t *device, int ordinal); +int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev); int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count); -int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal); -int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev); -int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev); +int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, hipDevice_t dev); +int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, hipDevice_t dev); int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion); -int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags); -int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); -int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd); -int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); -int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream); -int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent); -int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc); -//int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value); +int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags); +int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent); +int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd); +int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent); +int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream); +int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent); +int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc); int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags); -int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra); -int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize); -int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount); -int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount); -int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount); -int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr); -int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name); -int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues); -int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod); -int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags); -int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); -int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream); -int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx); -int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx); -int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut); -int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues); -int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state); -int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **cubinOut, size_t *sizeOut); +int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra); +int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize); +int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dptr); +int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount); +int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream); +int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount); +int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream); +int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount); +int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream); +int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name); +int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name); +int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues); +int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod); +int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags); +int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream); +int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream); int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data); int hc_clCompileProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data); @@ -184,10 +181,10 @@ int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size); int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size); -int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); -int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num); -int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size); -int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size); +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num); +int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num); +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u32 value, const u64 size); +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size); int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num); diff --git a/include/ext_ADL.h b/include/ext_ADL.h index fd8438c8e..369a8eb1a 100644 --- a/include/ext_ADL.h +++ b/include/ext_ADL.h @@ -13,228 +13,569 @@ #include #endif // _WIN +// Declarations from: +// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_defines.h +// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_structures.h + + +/// Defines ADL_TRUE +#define ADL_TRUE 1 +/// Defines ADL_FALSE +#define ADL_FALSE 0 + +//Define Performance Metrics Log max sensors number +#define ADL_PMLOG_MAX_SENSORS 256 + +typedef enum ADLSensorType +{ + SENSOR_MAXTYPES = 0, + PMLOG_CLK_GFXCLK = 1, + PMLOG_CLK_MEMCLK = 2, + PMLOG_CLK_SOCCLK = 3, + PMLOG_CLK_UVDCLK1 = 4, + PMLOG_CLK_UVDCLK2 = 5, + PMLOG_CLK_VCECLK = 6, + PMLOG_CLK_VCNCLK = 7, + PMLOG_TEMPERATURE_EDGE = 8, + PMLOG_TEMPERATURE_MEM = 9, + PMLOG_TEMPERATURE_VRVDDC = 10, + PMLOG_TEMPERATURE_VRMVDD = 11, + PMLOG_TEMPERATURE_LIQUID = 12, + PMLOG_TEMPERATURE_PLX = 13, + PMLOG_FAN_RPM = 14, + PMLOG_FAN_PERCENTAGE = 15, + PMLOG_SOC_VOLTAGE = 16, + PMLOG_SOC_POWER = 17, + PMLOG_SOC_CURRENT = 18, + PMLOG_INFO_ACTIVITY_GFX = 19, + PMLOG_INFO_ACTIVITY_MEM = 20, + PMLOG_GFX_VOLTAGE = 21, + PMLOG_MEM_VOLTAGE = 22, + PMLOG_ASIC_POWER = 23, + PMLOG_TEMPERATURE_VRSOC = 24, + PMLOG_TEMPERATURE_VRMVDD0 = 25, + PMLOG_TEMPERATURE_VRMVDD1 = 26, + PMLOG_TEMPERATURE_HOTSPOT = 27, + PMLOG_TEMPERATURE_GFX = 28, + PMLOG_TEMPERATURE_SOC = 29, + PMLOG_GFX_POWER = 30, + PMLOG_GFX_CURRENT = 31, + PMLOG_TEMPERATURE_CPU = 32, + PMLOG_CPU_POWER = 33, + PMLOG_CLK_CPUCLK = 34, + PMLOG_THROTTLER_STATUS = 35, + PMLOG_CLK_VCN1CLK1 = 36, + PMLOG_CLK_VCN1CLK2 = 37, + PMLOG_SMART_POWERSHIFT_CPU = 38, + PMLOG_SMART_POWERSHIFT_DGPU = 39, + PMLOG_BUS_SPEED = 40, + PMLOG_BUS_LANES = 41, + PMLOG_MAX_SENSORS_REAL +} ADLSensorType; + +/// Defines the maximum string length +#define ADL_MAX_CHAR 4096 +/// Defines the maximum string length +#define ADL_MAX_PATH 256 +/// Defines the maximum number of supported adapters +#define ADL_MAX_ADAPTERS 250 +/// Defines the maxumum number of supported displays +#define ADL_MAX_DISPLAYS 150 +/// Defines the maxumum string length for device name +#define ADL_MAX_DEVICENAME 32 +/// Defines for all adapters +#define ADL_ADAPTER_INDEX_ALL -1 + +/// \defgroup define_adl_results Result Codes +/// This group of definitions are the various results returned by all ADL functions \n +/// @{ +/// All OK, but need to wait +#define ADL_OK_WAIT 4 +/// All OK, but need restart +#define ADL_OK_RESTART 3 +/// All OK but need mode change +#define ADL_OK_MODE_CHANGE 2 +/// All OK, but with warning +#define ADL_OK_WARNING 1 +/// ADL function completed successfully +#define ADL_OK 0 +/// Generic Error. Most likely one or more of the Escape calls to the driver failed! +#define ADL_ERR -1 +/// ADL not initialized +#define ADL_ERR_NOT_INIT -2 +/// One of the parameter passed is invalid +#define ADL_ERR_INVALID_PARAM -3 +/// One of the parameter size is invalid +#define ADL_ERR_INVALID_PARAM_SIZE -4 +/// Invalid ADL index passed +#define ADL_ERR_INVALID_ADL_IDX -5 +/// Invalid controller index passed +#define ADL_ERR_INVALID_CONTROLLER_IDX -6 +/// Invalid display index passed +#define ADL_ERR_INVALID_DIPLAY_IDX -7 +/// Function not supported by the driver +#define ADL_ERR_NOT_SUPPORTED -8 +/// Null Pointer error +#define ADL_ERR_NULL_POINTER -9 +/// Call can't be made due to disabled adapter +#define ADL_ERR_DISABLED_ADAPTER -10 +/// Invalid Callback +#define ADL_ERR_INVALID_CALLBACK -11 +/// Display Resource conflict +#define ADL_ERR_RESOURCE_CONFLICT -12 +//Failed to update some of the values. Can be returned by set request that include multiple values if not all values were successfully committed. +#define ADL_ERR_SET_INCOMPLETE -20 +/// There's no Linux XDisplay in Linux Console environment +#define ADL_ERR_NO_XDISPLAY -21 + +//values for ADLFanSpeedValue.iSpeedType +#define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT 1 +#define ADL_DL_FANCTRL_SPEED_TYPE_RPM 2 + +//values for ADLFanSpeedValue.iFlags +#define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED 1 + /** - * Declarations from adl_sdk.h and subheaders + * Declarations from adl_structures.h */ -#define ADL_OK 0 -#define ADL_ERR -1 -#define ADL_ERR_NOT_SUPPORTED -8 - -#define ADL_MAX_PATH 256 - -#define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT 1 -#define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED 1 - +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about the graphics adapter. +/// +/// This structure is used to store various information about the graphics adapter. This +/// information can be returned to the user. Alternatively, it can be used to access various driver calls to set +/// or fetch various settings upon the user's request. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct AdapterInfo { - int iSize; - int iAdapterIndex; - char strUDID[ADL_MAX_PATH]; - int iBusNumber; - int iDeviceNumber; - int iFunctionNumber; - int iVendorID; - char strAdapterName[ADL_MAX_PATH]; - char strDisplayName[ADL_MAX_PATH]; - int iPresent; +/// \ALL_STRUCT_MEM - #if defined (_WIN32) || defined (_WIN64) || defined (__CYGWIN__) - int iExist; - char strDriverPath[ADL_MAX_PATH]; - char strDriverPathExt[ADL_MAX_PATH]; - char strPNPString[ADL_MAX_PATH]; - int iOSDisplayIndex; - #endif /* (_WIN32) || (_WIN64) || (__CYGWIN__) */ +/// Size of the structure. + int iSize; +/// The ADL index handle. One GPU may be associated with one or two index handles + int iAdapterIndex; +/// The unique device ID associated with this adapter. + char strUDID[ADL_MAX_PATH]; +/// The BUS number associated with this adapter. + int iBusNumber; +/// The driver number associated with this adapter. + int iDeviceNumber; +/// The function number. + int iFunctionNumber; +/// The vendor ID associated with this adapter. + int iVendorID; +/// Adapter name. + char strAdapterName[ADL_MAX_PATH]; +/// Display name. For example, "\\\\Display0" for Windows or ":0:0" for Linux. + char strDisplayName[ADL_MAX_PATH]; +/// Present or not; 1 if present and 0 if not present.It the logical adapter is present, the display name such as \\\\.\\Display1 can be found from OS + int iPresent; - #if defined (__linux__) - int iXScreenNum; - int iDrvIndex; - char strXScreenConfigName[ADL_MAX_PATH]; - #endif /* (__linux__) */ +#if defined (_WIN32) || defined (_WIN64) +/// \WIN_STRUCT_MEM + +/// Exist or not; 1 is exist and 0 is not present. + int iExist; +/// Driver registry path. + char strDriverPath[ADL_MAX_PATH]; +/// Driver registry path Ext for. + char strDriverPathExt[ADL_MAX_PATH]; +/// PNP string from Windows. + char strPNPString[ADL_MAX_PATH]; +/// It is generated from EnumDisplayDevices. + int iOSDisplayIndex; + +#endif /* (_WIN32) || (_WIN64) */ + +#if defined (LINUX) +/// \LNX_STRUCT_MEM + +/// Internal X screen number from GPUMapInfo (DEPRICATED use XScreenInfo) + int iXScreenNum; +/// Internal driver index from GPUMapInfo + int iDrvIndex; +/// \deprecated Internal x config file screen identifier name. Use XScreenInfo instead. + char strXScreenConfigName[ADL_MAX_PATH]; + +#endif /* (LINUX) */ } AdapterInfo, *LPAdapterInfo; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about thermal controller. +/// +/// This structure is used to store information about thermal controller. +/// This structure is used by ADL_PM_ThermalDevices_Enum. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLThermalControllerInfo { +/// Must be set to the size of the structure int iSize; +/// Possible valies: \ref ADL_DL_THERMAL_DOMAIN_OTHER or \ref ADL_DL_THERMAL_DOMAIN_GPU. int iThermalDomain; +/// GPU 0, 1, etc. int iDomainIndex; +/// Possible valies: \ref ADL_DL_THERMAL_FLAG_INTERRUPT or \ref ADL_DL_THERMAL_FLAG_FANCONTROL int iFlags; } ADLThermalControllerInfo; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about thermal controller temperature. +/// +/// This structure is used to store information about thermal controller temperature. +/// This structure is used by the ADL_PM_Temperature_Get() function. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLTemperature { +/// Must be set to the size of the structure int iSize; +/// Temperature in millidegrees Celsius. int iTemperature; } ADLTemperature; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about thermal controller fan speed. +/// +/// This structure is used to store information about thermal controller fan speed. +/// This structure is used by the ADL_PM_FanSpeedInfo_Get() function. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLFanSpeedInfo { +/// Must be set to the size of the structure int iSize; +/// \ref define_fanctrl int iFlags; +/// Minimum possible fan speed value in percents. int iMinPercent; +/// Maximum possible fan speed value in percents. int iMaxPercent; +/// Minimum possible fan speed value in RPM. int iMinRPM; +/// Maximum possible fan speed value in RPM. int iMaxRPM; } ADLFanSpeedInfo; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about fan speed reported by thermal controller. +/// +/// This structure is used to store information about fan speed reported by thermal controller. +/// This structure is used by the ADL_Overdrive5_FanSpeed_Get() and ADL_Overdrive5_FanSpeed_Set() functions. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLFanSpeedValue { +/// Must be set to the size of the structure int iSize; +/// Possible valies: \ref ADL_DL_FANCTRL_SPEED_TYPE_PERCENT or \ref ADL_DL_FANCTRL_SPEED_TYPE_RPM int iSpeedType; +/// Fan speed value int iFanSpeed; +/// The only flag for now is: \ref ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED int iFlags; } ADLFanSpeedValue; -typedef struct ADLDisplayID -{ - int iDisplayLogicalIndex; - int iDisplayPhysicalIndex; - int iDisplayLogicalAdapterIndex; - int iDisplayPhysicalAdapterIndex; -} ADLDisplayID, *LPADLDisplayID; - -typedef struct ADLDisplayInfo -{ - ADLDisplayID displayID; - int iDisplayControllerIndex; - char strDisplayName[ADL_MAX_PATH]; - char strDisplayManufacturerName[ADL_MAX_PATH]; - int iDisplayType; - int iDisplayOutputType; - int iDisplayConnector; - int iDisplayInfoMask; - int iDisplayInfoValue; -} ADLDisplayInfo, *LPADLDisplayInfo; - -typedef struct ADLBiosInfo -{ - char strPartNumber[ADL_MAX_PATH]; - char strVersion[ADL_MAX_PATH]; - char strDate[ADL_MAX_PATH]; -} ADLBiosInfo, *LPADLBiosInfo; - +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about current power management related activity. +/// +/// This structure is used to store information about current power management related activity. +/// This structure (Overdrive 5 interfaces) is used by the ADL_PM_CurrentActivity_Get() function. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLPMActivity { - int iSize; - int iEngineClock; - int iMemoryClock; - int iVddc; - int iActivityPercent; - int iCurrentPerformanceLevel; - int iCurrentBusSpeed; - int iCurrentBusLanes; - int iMaximumBusLanes; - int iReserved; +/// Must be set to the size of the structure + int iSize; +/// Current engine clock. + int iEngineClock; +/// Current memory clock. + int iMemoryClock; +/// Current core voltage. + int iVddc; +/// GPU utilization. + int iActivityPercent; +/// Performance level index. + int iCurrentPerformanceLevel; +/// Current PCIE bus speed. + int iCurrentBusSpeed; +/// Number of PCIE bus lanes. + int iCurrentBusLanes; +/// Maximum number of PCIE bus lanes. + int iMaximumBusLanes; +/// Reserved for future purposes. + int iReserved; } ADLPMActivity; +//////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing the range of Overdrive parameter. +/// +/// This structure is used to store information about the range of Overdrive parameter. +/// This structure is used by ADLODParameters. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLODParameterRange { +/// Minimum parameter value. int iMin; +/// Maximum parameter value. int iMax; +/// Parameter step value. int iStep; } ADLODParameterRange; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive parameters. +/// +/// This structure is used to store information about Overdrive parameters. +/// This structure is used by the ADL_Overdrive5_ODParameters_Get() function. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLODParameters { +/// Must be set to the size of the structure int iSize; +/// Number of standard performance states. int iNumberOfPerformanceLevels; +/// Indicates whether the GPU is capable to measure its activity. int iActivityReportingSupported; +/// Indicates whether the GPU supports discrete performance levels or performance range. int iDiscretePerformanceLevels; +/// Reserved for future use. int iReserved; +/// Engine clock range. ADLODParameterRange sEngineClock; +/// Memory clock range. ADLODParameterRange sMemoryClock; +/// Core voltage range. ADLODParameterRange sVddc; } ADLODParameters; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive 6 fan speed information +/// +/// This structure is used to store information about Overdrive 6 fan speed information +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLOD6FanSpeedInfo +{ + /// Contains a bitmap of the valid fan speed type flags. Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM, \ref ADL_OD6_FANSPEED_USER_DEFINED + int iSpeedType; + /// Contains current fan speed in percent (if valid flag exists in iSpeedType) + int iFanSpeedPercent; + /// Contains current fan speed in RPM (if valid flag exists in iSpeedType) + int iFanSpeedRPM; + + /// Value for future extension + int iExtValue; + /// Mask for future extension + int iExtMask; + +} ADLOD6FanSpeedInfo; + +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive 6 fan speed value +/// +/// This structure is used to store information about Overdrive 6 fan speed value +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLOD6FanSpeedValue +{ + /// Indicates the units of the fan speed. Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM + int iSpeedType; + /// Fan speed value (units as indicated above) + int iFanSpeed; + + /// Value for future extension + int iExtValue; + /// Mask for future extension + int iExtMask; + +} ADLOD6FanSpeedValue; + +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about current Overdrive 6 performance status. +/// +/// This structure is used to store information about current Overdrive 6 performance status. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLOD6CurrentStatus +{ + /// Current engine clock in 10 KHz. + int iEngineClock; + /// Current memory clock in 10 KHz. + int iMemoryClock; + /// Current GPU activity in percent. This + /// indicates how "busy" the GPU is. + int iActivityPercent; + /// Not used. Reserved for future use. + int iCurrentPerformanceLevel; + /// Current PCI-E bus speed + int iCurrentBusSpeed; + /// Current PCI-E bus # of lanes + int iCurrentBusLanes; + /// Maximum possible PCI-E bus # of lanes + int iMaximumBusLanes; + + /// Value for future extension + int iExtValue; + /// Mask for future extension + int iExtMask; + +} ADLOD6CurrentStatus; + +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive 6 clock range +/// +/// This structure is used to store information about Overdrive 6 clock range +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLOD6ParameterRange +{ + /// The starting value of the clock range + int iMin; + /// The ending value of the clock range + int iMax; + /// The minimum increment between clock values + int iStep; + +} ADLOD6ParameterRange; + +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive 6 capabilities +/// +/// This structure is used to store information about Overdrive 6 capabilities +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLOD6Capabilities +{ + /// Contains a bitmap of the OD6 capability flags. Possible values: \ref ADL_OD6_CAPABILITY_SCLK_CUSTOMIZATION, + /// \ref ADL_OD6_CAPABILITY_MCLK_CUSTOMIZATION, \ref ADL_OD6_CAPABILITY_GPU_ACTIVITY_MONITOR + int iCapabilities; + /// Contains a bitmap indicating the power states + /// supported by OD6. Currently only the performance state + /// is supported. Possible Values: \ref ADL_OD6_SUPPORTEDSTATE_PERFORMANCE + int iSupportedStates; + /// Number of levels. OD6 will always use 2 levels, which describe + /// the minimum to maximum clock ranges. + /// The 1st level indicates the minimum clocks, and the 2nd level + /// indicates the maximum clocks. + int iNumberOfPerformanceLevels; + /// Contains the hard limits of the sclk range. Overdrive + /// clocks cannot be set outside this range. + ADLOD6ParameterRange sEngineClockRange; + /// Contains the hard limits of the mclk range. Overdrive + /// clocks cannot be set outside this range. + ADLOD6ParameterRange sMemoryClockRange; + + /// Value for future extension + int iExtValue; + /// Mask for future extension + int iExtMask; + +} ADLOD6Capabilities; + +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive level. +/// +/// This structure is used to store information about Overdrive level. +/// This structure is used by ADLODPerformanceLevels. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLODPerformanceLevel { +/// Engine clock. int iEngineClock; +/// Memory clock. int iMemoryClock; +/// Core voltage. int iVddc; } ADLODPerformanceLevel; -/* - * Attention: we had to change this struct due to an out-of-bound problem mentioned here: - * https://github.com/hashcat/hashcat/issues/244 - * the change: ADLODPerformanceLevel aLevels [1] -> ADLODPerformanceLevel aLevels [2] - */ - -typedef struct ADLODPerformanceLevels -{ - int iSize; - int iReserved; - ADLODPerformanceLevel aLevels [2]; -} ADLODPerformanceLevels; - -typedef struct ADLOD6FanSpeedInfo -{ - int iSpeedType; - int iFanSpeedPercent; - int iFanSpeedRPM; - int iExtValue; - int iExtMask; -} ADLOD6FanSpeedInfo; - -typedef struct ADLOD6FanSpeedValue -{ - int iSpeedType; - int iFanSpeed; - int iExtValue; - int iExtMask; -} ADLOD6FanSpeedValue; - -typedef struct ADLOD6CurrentStatus -{ - int iEngineClock; - int iMemoryClock; - int iActivityPercent; - int iCurrentPerformanceLevel; - int iCurrentBusSpeed; - int iCurrentBusLanes; - int iMaximumBusLanes; - int iExtValue; - int iExtMask; -} ADLOD6CurrentStatus; - -typedef struct ADLOD6ParameterRange -{ - int iMin; - int iMax; - int iStep; -} ADLOD6ParameterRange; - -typedef struct ADLOD6Capabilities -{ - int iCapabilities; - int iSupportedStates; - int iNumberOfPerformanceLevels; - ADLOD6ParameterRange sEngineClockRange; - ADLOD6ParameterRange sMemoryClockRange; - int iExtValue; - int iExtMask; -} ADLOD6Capabilities; - +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive 6 clock values. +/// +/// This structure is used to store information about Overdrive 6 clock values. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLOD6PerformanceLevel { - int iEngineClock; - int iMemoryClock; + /// Engine (core) clock. + int iEngineClock; + /// Memory clock. + int iMemoryClock; + } ADLOD6PerformanceLevel; -/* - * Attention: we had to change this struct due to an out-of-bound problem mentioned here: - * https://github.com/hashcat/hashcat/issues/244 - * the change: ADLOD6PerformanceLevel aLevels [1] -> ADLOD6PerformanceLevel aLevels [2] - */ - +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive 6 clocks. +/// +/// This structure is used to store information about Overdrive 6 clocks. This is a +/// variable-sized structure. iNumberOfPerformanceLevels indicate how many elements +/// are contained in the aLevels array. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// typedef struct ADLOD6StateInfo { - int iNumberOfPerformanceLevels; - int iExtValue; - int iExtMask; - ADLOD6PerformanceLevel aLevels [2]; + /// Number of levels. OD6 uses clock ranges instead of discrete performance levels. + /// iNumberOfPerformanceLevels is always 2. The 1st level indicates the minimum clocks + /// in the range. The 2nd level indicates the maximum clocks in the range. + int iNumberOfPerformanceLevels; + + /// Value for future extension + int iExtValue; + /// Mask for future extension + int iExtMask; + + /// Variable-sized array of levels. + /// The number of elements in the array is specified by iNumberofPerformanceLevels. + ADLOD6PerformanceLevel aLevels [1]; + } ADLOD6StateInfo; +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Overdrive performance levels. +/// +/// This structure is used to store information about Overdrive performance levels. +/// This structure is used by the ADL_Overdrive5_ODPerformanceLevels_Get() and ADL_Overdrive5_ODPerformanceLevels_Set() functions. +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLODPerformanceLevels +{ +/// Must be set to sizeof( \ref ADLODPerformanceLevels ) + sizeof( \ref ADLODPerformanceLevel ) * (ADLODParameters.iNumberOfPerformanceLevels - 1) + int iSize; + int iReserved; +/// Array of performance state descriptors. Must have ADLODParameters.iNumberOfPerformanceLevels elements. + ADLODPerformanceLevel aLevels [1]; +} ADLODPerformanceLevels; + +///////////////////////////////////////////////////////////////////////////////////////////// +///\brief Structure containing information about Performance Metrics data +/// +/// This structure is used to store information about Performance Metrics data output +/// \nosubgrouping +//////////////////////////////////////////////////////////////////////////////////////////// +typedef struct ADLSingleSensorData +{ + int supported; + int value; +} ADLSingleSensorData; + +typedef struct ADLPMLogDataOutput +{ + int size; + ADLSingleSensorData sensors[ADL_PMLOG_MAX_SENSORS]; +}ADLPMLogDataOutput; + +/// \brief Handle to ADL client context. +/// +/// ADL clients obtain context handle from initial call to \ref ADL2_Main_Control_Create. +/// Clients have to pass the handle to each subsequent ADL call and finally destroy +/// the context with call to \ref ADL2_Main_Control_Destroy +/// \nosubgrouping +typedef void *ADL_CONTEXT_HANDLE; + #if defined (__MSC_VER) #define ADL_API_CALL __cdecl #elif defined (_WIN32) || defined (__WIN32__) @@ -251,62 +592,51 @@ typedef void* (ADL_API_CALL *ADL_MAIN_MALLOC_CALLBACK )( int ); typedef int HM_ADAPTER_ADL; -typedef struct struct_ADLOD6MemClockState -{ - ADLOD6StateInfo state; - ADLOD6PerformanceLevel level; - -} ADLOD6MemClockState; - -typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY) (void); -typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE) (ADL_MAIN_MALLOC_CALLBACK, int); -typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET) (int *); -typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET) (LPAdapterInfo, int); -typedef int (ADL_API_CALL *ADL_DISPLAY_DISPLAYINFO_GET) (int, int *, ADLDisplayInfo **, int); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_TEMPERATURE_GET) (int, int, ADLTemperature *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TEMPERATURE_GET) (int, int *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET) (int, ADLPMActivity *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_THERMALDEVICES_ENUM) (int, int, ADLThermalControllerInfo *); -typedef int (ADL_API_CALL *ADL_ADAPTER_ID_GET) (int, int *); -typedef int (ADL_API_CALL *ADL_ADAPTER_VIDEOBIOSINFO_GET) (int, ADLBiosInfo *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEEDINFO_GET) (int, int, ADLFanSpeedInfo *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEED_GET) (int, int, ADLFanSpeedValue *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_FANSPEED_GET) (int, ADLOD6FanSpeedInfo *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPARAMETERS_GET) (int, ADLODParameters *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET) (int, int, ADLODPerformanceLevels *); -typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET) (int, int *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE_CAPS) (int, int *, int *, int *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CURRENTSTATUS_GET) (int, ADLOD6CurrentStatus *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_STATEINFO_GET) (int, int, ADLOD6MemClockState *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CAPABILITIES_GET) (int, ADLOD6Capabilities *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TARGETTEMPERATUREDATA_GET) (int, int *, int *); -typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TARGETTEMPERATURERANGEINFO_GET) (int, ADLOD6ParameterRange *); +typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET ) ( int, int* ); +typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET ) ( LPAdapterInfo, int ); +typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* ); +typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int ); +typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )(); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET ) (int iAdapterIndex, ADLPMActivity *lpActivity); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEEDINFO_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedInfo *lpFanSpeedInfo); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_FANSPEED_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPARAMETERS_GET ) (int iAdapterIndex, ADLODParameters *lpOdParameters); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET ) (int iAdapterIndex, int iDefault, ADLODPerformanceLevels *lpOdPerformanceLevels); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_TEMPERATURE_GET ) (int iAdapterIndex, int iThermalControllerIndex, ADLTemperature *lpTemperature); +typedef int (ADL_API_CALL *ADL_OVERDRIVE5_THERMALDEVICES_ENUM ) (int iAdapterIndex, int iThermalControllerIndex, ADLThermalControllerInfo *lpThermalControllerInfo); +typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CAPABILITIES_GET ) (int iAdapterIndex, ADLOD6Capabilities *lpODCapabilities); +typedef int (ADL_API_CALL *ADL_OVERDRIVE6_CURRENTSTATUS_GET )(int iAdapterIndex, ADLOD6CurrentStatus *lpCurrentStatus); +typedef int (ADL_API_CALL *ADL_OVERDRIVE6_FANSPEED_GET )(int iAdapterIndex, ADLOD6FanSpeedInfo *lpFanSpeedInfo); +typedef int (ADL_API_CALL *ADL_OVERDRIVE6_STATEINFO_GET )(int iAdapterIndex, int iStateType, ADLOD6StateInfo *lpStateInfo); +typedef int (ADL_API_CALL *ADL_OVERDRIVE6_TEMPERATURE_GET )(int iAdapterIndex, int *lpTemperature); +typedef int (ADL_API_CALL *ADL_OVERDRIVE_CAPS ) (int iAdapterIndex, int *iSupported, int *iEnabled, int *iVersion); +typedef int (ADL_API_CALL *ADL2_OVERDRIVE_CAPS) (ADL_CONTEXT_HANDLE context, int iAdapterIndex, int * iSupported, int * iEnabled, int * iVersion); +typedef int (ADL_API_CALL *ADL2_NEW_QUERYPMLOGDATA_GET) (ADL_CONTEXT_HANDLE, int, ADLPMLogDataOutput*); typedef struct hm_adl_lib { hc_dynlib_t lib; - ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy; - ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create; - ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get; + ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get; ADL_ADAPTER_ADAPTERINFO_GET ADL_Adapter_AdapterInfo_Get; - ADL_DISPLAY_DISPLAYINFO_GET ADL_Display_DisplayInfo_Get; - ADL_ADAPTER_ID_GET ADL_Adapter_ID_Get; - ADL_ADAPTER_VIDEOBIOSINFO_GET ADL_Adapter_VideoBiosInfo_Get; - ADL_OVERDRIVE5_THERMALDEVICES_ENUM ADL_Overdrive5_ThermalDevices_Enum; - ADL_OVERDRIVE5_TEMPERATURE_GET ADL_Overdrive5_Temperature_Get; - ADL_OVERDRIVE6_TEMPERATURE_GET ADL_Overdrive6_Temperature_Get; + ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get; + ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create; + ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy; ADL_OVERDRIVE5_CURRENTACTIVITY_GET ADL_Overdrive5_CurrentActivity_Get; ADL_OVERDRIVE5_FANSPEEDINFO_GET ADL_Overdrive5_FanSpeedInfo_Get; ADL_OVERDRIVE5_FANSPEED_GET ADL_Overdrive5_FanSpeed_Get; - ADL_OVERDRIVE6_FANSPEED_GET ADL_Overdrive6_FanSpeed_Get; - ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get; - ADL_OVERDRIVE_CAPS ADL_Overdrive_Caps; + ADL_OVERDRIVE5_ODPARAMETERS_GET ADL_Overdrive5_ODParameters_Get; + ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET ADL_Overdrive5_ODPerformanceLevels_Get; + ADL_OVERDRIVE5_TEMPERATURE_GET ADL_Overdrive5_Temperature_Get; + ADL_OVERDRIVE5_THERMALDEVICES_ENUM ADL_Overdrive5_ThermalDevices_Enum; ADL_OVERDRIVE6_CAPABILITIES_GET ADL_Overdrive6_Capabilities_Get; - ADL_OVERDRIVE6_STATEINFO_GET ADL_Overdrive6_StateInfo_Get; ADL_OVERDRIVE6_CURRENTSTATUS_GET ADL_Overdrive6_CurrentStatus_Get; - ADL_OVERDRIVE6_TARGETTEMPERATUREDATA_GET ADL_Overdrive6_TargetTemperatureData_Get; - ADL_OVERDRIVE6_TARGETTEMPERATURERANGEINFO_GET ADL_Overdrive6_TargetTemperatureRangeInfo_Get; + ADL_OVERDRIVE6_FANSPEED_GET ADL_Overdrive6_FanSpeed_Get; + ADL_OVERDRIVE6_STATEINFO_GET ADL_Overdrive6_StateInfo_Get; + ADL_OVERDRIVE6_TEMPERATURE_GET ADL_Overdrive6_Temperature_Get; + ADL_OVERDRIVE_CAPS ADL_Overdrive_Caps; + ADL2_OVERDRIVE_CAPS ADL2_Overdrive_Caps; + ADL2_NEW_QUERYPMLOGDATA_GET ADL2_New_QueryPMLogData_Get; } hm_adl_lib_t; @@ -326,6 +656,8 @@ int hm_ADL_Overdrive_CurrentActivity_Get (void *hashcat_ctx, int iAdapterIndex, int hm_ADL_Overdrive5_FanSpeed_Get (void *hashcat_ctx, int iAdapterIndex, int iThermalControllerIndex, ADLFanSpeedValue *lpFanSpeedValue); int hm_ADL_Overdrive6_FanSpeed_Get (void *hashcat_ctx, int iAdapterIndex, ADLOD6FanSpeedInfo *lpFanSpeedInfo); int hm_ADL_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *od_supported, int *od_enabled, int *od_version); -int hm_ADL_Overdrive6_TargetTemperatureData_Get (void *hashcat_ctx, int iAdapterIndex, int *cur_temp, int *default_temp); +int hm_ADL2_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *od_supported, int *od_enabled, int *od_version); +int hm_ADL2_New_QueryPMLogData_Get (void *hashcat_ctx, int iAdapterIndex, ADLPMLogDataOutput *lpDataOutput); + #endif // _EXT_ADL_H diff --git a/include/ext_hip.h b/include/ext_hip.h index a99d1e5a9..1477c20c4 100644 --- a/include/ext_hip.h +++ b/include/ext_hip.h @@ -6,995 +6,344 @@ #ifndef _EXT_HIP_H #define _EXT_HIP_H -/** - * TODO: FIX ME - */ +// The general Idea with HIP is to use it for AMD GPU since we use CUDA for NV +// Therefore, we need to take certain items, such as hipDeviceptr_t from driver specific paths like amd_driver_types.h +// We just need to keep this in mind in case we need to update these constants from future SDK versions -#define __HIP_API_VERSION 4221131 +// start: amd_driver_types.h -/** - * HIP device pointer - * HIPdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform. - */ -#if __HIP_API_VERSION >= 3020 +typedef void* hipDeviceptr_t; -#if defined(_WIN64) || defined(__LP64__) -typedef unsigned long long HIPdeviceptr; +typedef enum hipFunction_attribute { + HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, + HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, + HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, + HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, + HIP_FUNC_ATTRIBUTE_NUM_REGS, + HIP_FUNC_ATTRIBUTE_PTX_VERSION, + HIP_FUNC_ATTRIBUTE_BINARY_VERSION, + HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, + HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, + HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, + HIP_FUNC_ATTRIBUTE_MAX +}hipFunction_attribute; + +// stop: amd_driver_types.h + +// start: hip_runtime_api.h + +typedef int hipDevice_t; +typedef struct ihipCtx_t* hipCtx_t; +typedef struct ihipEvent_t* hipEvent_t; +typedef struct ihipStream_t* hipStream_t; +typedef struct ihipModule_t* hipModule_t; +typedef struct ihipModuleSymbol_t* hipFunction_t; + +// Ignoring error-code return values from hip APIs is discouraged. On C++17, +// we can make that yield a warning +#if __cplusplus >= 201703L +#define __HIP_NODISCARD [[nodiscard]] #else -typedef unsigned int HIPdeviceptr; +#define __HIP_NODISCARD #endif -#endif /* __HIP_API_VERSION >= 3020 */ +typedef enum __HIP_NODISCARD hipError_t { + hipSuccess = 0, ///< Successful completion. + hipErrorInvalidValue = 1, ///< One or more of the parameters passed to the API call is NULL + ///< or not in an acceptable range. + hipErrorOutOfMemory = 2, + // Deprecated + hipErrorMemoryAllocation = 2, ///< Memory allocation error. + hipErrorNotInitialized = 3, + // Deprecated + hipErrorInitializationError = 3, + hipErrorDeinitialized = 4, + hipErrorProfilerDisabled = 5, + hipErrorProfilerNotInitialized = 6, + hipErrorProfilerAlreadyStarted = 7, + hipErrorProfilerAlreadyStopped = 8, + hipErrorInvalidConfiguration = 9, + hipErrorInvalidPitchValue = 12, + hipErrorInvalidSymbol = 13, + hipErrorInvalidDevicePointer = 17, ///< Invalid Device Pointer + hipErrorInvalidMemcpyDirection = 21, ///< Invalid memory copy direction + hipErrorInsufficientDriver = 35, + hipErrorMissingConfiguration = 52, + hipErrorPriorLaunchFailure = 53, + hipErrorInvalidDeviceFunction = 98, + hipErrorNoDevice = 100, ///< Call to hipGetDeviceCount returned 0 devices + hipErrorInvalidDevice = 101, ///< DeviceID must be in range 0...#compute-devices. + hipErrorInvalidImage = 200, + hipErrorInvalidContext = 201, ///< Produced when input context is invalid. + hipErrorContextAlreadyCurrent = 202, + hipErrorMapFailed = 205, + // Deprecated + hipErrorMapBufferObjectFailed = 205, ///< Produced when the IPC memory attach failed from ROCr. + hipErrorUnmapFailed = 206, + hipErrorArrayIsMapped = 207, + hipErrorAlreadyMapped = 208, + hipErrorNoBinaryForGpu = 209, + hipErrorAlreadyAcquired = 210, + hipErrorNotMapped = 211, + hipErrorNotMappedAsArray = 212, + hipErrorNotMappedAsPointer = 213, + hipErrorECCNotCorrectable = 214, + hipErrorUnsupportedLimit = 215, + hipErrorContextAlreadyInUse = 216, + hipErrorPeerAccessUnsupported = 217, + hipErrorInvalidKernelFile = 218, ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX + hipErrorInvalidGraphicsContext = 219, + hipErrorInvalidSource = 300, + hipErrorFileNotFound = 301, + hipErrorSharedObjectSymbolNotFound = 302, + hipErrorSharedObjectInitFailed = 303, + hipErrorOperatingSystem = 304, + hipErrorInvalidHandle = 400, + // Deprecated + hipErrorInvalidResourceHandle = 400, ///< Resource handle (hipEvent_t or hipStream_t) invalid. + hipErrorNotFound = 500, + hipErrorNotReady = 600, ///< Indicates that asynchronous operations enqueued earlier are not + ///< ready. This is not actually an error, but is used to distinguish + ///< from hipSuccess (which indicates completion). APIs that return + ///< this error include hipEventQuery and hipStreamQuery. + hipErrorIllegalAddress = 700, + hipErrorLaunchOutOfResources = 701, ///< Out of resources error. + hipErrorLaunchTimeOut = 702, + hipErrorPeerAccessAlreadyEnabled = + 704, ///< Peer access was already enabled from the current device. + hipErrorPeerAccessNotEnabled = + 705, ///< Peer access was never enabled from the current device. + hipErrorSetOnActiveProcess = 708, + hipErrorContextIsDestroyed = 709, + hipErrorAssert = 710, ///< Produced when the kernel calls assert. + hipErrorHostMemoryAlreadyRegistered = + 712, ///< Produced when trying to lock a page-locked memory. + hipErrorHostMemoryNotRegistered = + 713, ///< Produced when trying to unlock a non-page-locked memory. + hipErrorLaunchFailure = + 719, ///< An exception occurred on the device while executing a kernel. + hipErrorCooperativeLaunchTooLarge = + 720, ///< This error indicates that the number of blocks launched per grid for a kernel + ///< that was launched via cooperative launch APIs exceeds the maximum number of + ///< allowed blocks for the current device + hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented + hipErrorStreamCaptureUnsupported = 900, ///< The operation is not permitted when the stream + ///< is capturing. + hipErrorStreamCaptureInvalidated = 901, ///< The current capture sequence on the stream + ///< has been invalidated due to a previous error. + hipErrorStreamCaptureMerge = 902, ///< The operation would have resulted in a merge of + ///< two independent capture sequences. + hipErrorStreamCaptureUnmatched = 903, ///< The capture was not initiated in this stream. + hipErrorStreamCaptureUnjoined = 904, ///< The capture sequence contains a fork that was not + ///< joined to the primary stream. + hipErrorStreamCaptureIsolation = 905, ///< A dependency would have been created which crosses + ///< the capture sequence boundary. Only implicit + ///< in-stream ordering dependencies are allowed + ///< to cross the boundary + hipErrorStreamCaptureImplicit = 906, ///< The operation would have resulted in a disallowed + ///< implicit dependency on a current capture sequence + ///< from hipStreamLegacy. + hipErrorCapturedEvent = 907, ///< The operation is not permitted on an event which was last + ///< recorded in a capturing stream. + hipErrorStreamCaptureWrongThread = 908, ///< A stream capture sequence not initiated with + ///< the hipStreamCaptureModeRelaxed argument to + ///< hipStreamBeginCapture was passed to + ///< hipStreamEndCapture in a different thread. + hipErrorUnknown = 999, //< Unknown error. + // HSA Runtime Error Codes start here. + hipErrorRuntimeMemory = 1052, ///< HSA runtime memory call returned error. Typically not seen + ///< in production systems. + hipErrorRuntimeOther = 1053, ///< HSA runtime call other than memory returned error. Typically + ///< not seen in production systems. + hipErrorTbd ///< Marker that more error codes are needed. +} hipError_t; + +#undef __HIP_NODISCARD + +typedef enum hipDeviceAttribute_t { + hipDeviceAttributeCudaCompatibleBegin = 0, + + hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled. + hipDeviceAttributeAccessPolicyMaxWindowSize, ///< Cuda only. The maximum size of the window policy in bytes. + hipDeviceAttributeAsyncEngineCount, ///< Cuda only. Asynchronous engines number. + hipDeviceAttributeCanMapHostMemory, ///< Whether host memory can be mapped into device address space + hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Cuda only. Device can access host registered memory + ///< at the same virtual address as the CPU + hipDeviceAttributeClockRate, ///< Peak clock frequency in kilohertz. + hipDeviceAttributeComputeMode, ///< Compute mode that device is currently in. + hipDeviceAttributeComputePreemptionSupported, ///< Cuda only. Device supports Compute Preemption. + hipDeviceAttributeConcurrentKernels, ///< Device can possibly execute multiple kernels concurrently. + hipDeviceAttributeConcurrentManagedAccess, ///< Device can coherently access managed memory concurrently with the CPU + hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch + hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices + hipDeviceAttributeDeviceOverlap, ///< Cuda only. Device can concurrently copy memory and execute a kernel. + ///< Deprecated. Use instead asyncEngineCount. + hipDeviceAttributeDirectManagedMemAccessFromHost, ///< Host can directly access managed memory on + ///< the device without migration + hipDeviceAttributeGlobalL1CacheSupported, ///< Cuda only. Device supports caching globals in L1 + hipDeviceAttributeHostNativeAtomicSupported, ///< Cuda only. Link between the device and the host supports native atomic operations + hipDeviceAttributeIntegrated, ///< Device is integrated GPU + hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. + hipDeviceAttributeKernelExecTimeout, ///< Run time limit for kernels executed on the device + hipDeviceAttributeL2CacheSize, ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache. + hipDeviceAttributeLocalL1CacheSupported, ///< caching locals in L1 is supported + hipDeviceAttributeLuid, ///< Cuda only. 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms + hipDeviceAttributeLuidDeviceNodeMask, ///< Cuda only. Luid device node mask. Undefined on TCC and non-Windows platforms + hipDeviceAttributeComputeCapabilityMajor, ///< Major compute capability version number. + hipDeviceAttributeManagedMemory, ///< Device supports allocating managed memory on this system + hipDeviceAttributeMaxBlocksPerMultiProcessor, ///< Cuda only. Max block size per multiprocessor + hipDeviceAttributeMaxBlockDimX, ///< Max block size in width. + hipDeviceAttributeMaxBlockDimY, ///< Max block size in height. + hipDeviceAttributeMaxBlockDimZ, ///< Max block size in depth. + hipDeviceAttributeMaxGridDimX, ///< Max grid size in width. + hipDeviceAttributeMaxGridDimY, ///< Max grid size in height. + hipDeviceAttributeMaxGridDimZ, ///< Max grid size in depth. + hipDeviceAttributeMaxSurface1D, ///< Maximum size of 1D surface. + hipDeviceAttributeMaxSurface1DLayered, ///< Cuda only. Maximum dimensions of 1D layered surface. + hipDeviceAttributeMaxSurface2D, ///< Maximum dimension (width, height) of 2D surface. + hipDeviceAttributeMaxSurface2DLayered, ///< Cuda only. Maximum dimensions of 2D layered surface. + hipDeviceAttributeMaxSurface3D, ///< Maximum dimension (width, height, depth) of 3D surface. + hipDeviceAttributeMaxSurfaceCubemap, ///< Cuda only. Maximum dimensions of Cubemap surface. + hipDeviceAttributeMaxSurfaceCubemapLayered, ///< Cuda only. Maximum dimension of Cubemap layered surface. + hipDeviceAttributeMaxTexture1DWidth, ///< Maximum size of 1D texture. + hipDeviceAttributeMaxTexture1DLayered, ///< Cuda only. Maximum dimensions of 1D layered texture. + hipDeviceAttributeMaxTexture1DLinear, ///< Maximum number of elements allocatable in a 1D linear texture. + ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda. + hipDeviceAttributeMaxTexture1DMipmap, ///< Cuda only. Maximum size of 1D mipmapped texture. + hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D texture. + hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension hight of 2D texture. + hipDeviceAttributeMaxTexture2DGather, ///< Cuda only. Maximum dimensions of 2D texture if gather operations performed. + hipDeviceAttributeMaxTexture2DLayered, ///< Cuda only. Maximum dimensions of 2D layered texture. + hipDeviceAttributeMaxTexture2DLinear, ///< Cuda only. Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory. + hipDeviceAttributeMaxTexture2DMipmap, ///< Cuda only. Maximum dimensions of 2D mipmapped texture. + hipDeviceAttributeMaxTexture3DWidth, ///< Maximum dimension width of 3D texture. + hipDeviceAttributeMaxTexture3DHeight, ///< Maximum dimension height of 3D texture. + hipDeviceAttributeMaxTexture3DDepth, ///< Maximum dimension depth of 3D texture. + hipDeviceAttributeMaxTexture3DAlt, ///< Cuda only. Maximum dimensions of alternate 3D texture. + hipDeviceAttributeMaxTextureCubemap, ///< Cuda only. Maximum dimensions of Cubemap texture + hipDeviceAttributeMaxTextureCubemapLayered, ///< Cuda only. Maximum dimensions of Cubemap layered texture. + hipDeviceAttributeMaxThreadsDim, ///< Maximum dimension of a block + hipDeviceAttributeMaxThreadsPerBlock, ///< Maximum number of threads per block. + hipDeviceAttributeMaxThreadsPerMultiProcessor, ///< Maximum resident threads per multiprocessor. + hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies + hipDeviceAttributeMemoryBusWidth, ///< Global memory bus width in bits. + hipDeviceAttributeMemoryClockRate, ///< Peak memory clock frequency in kilohertz. + hipDeviceAttributeComputeCapabilityMinor, ///< Minor compute capability version number. + hipDeviceAttributeMultiGpuBoardGroupID, ///< Cuda only. Unique ID of device group on the same multi-GPU board + hipDeviceAttributeMultiprocessorCount, ///< Number of multiprocessors on the device. + hipDeviceAttributeName, ///< Device name. + hipDeviceAttributePageableMemoryAccess, ///< Device supports coherently accessing pageable memory + ///< without calling hipHostRegister on it + hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables + hipDeviceAttributePciBusId, ///< PCI Bus ID. + hipDeviceAttributePciDeviceId, ///< PCI Device ID. + hipDeviceAttributePciDomainID, ///< PCI Domain ID. + hipDeviceAttributePersistingL2CacheMaxSize, ///< Cuda11 only. Maximum l2 persisting lines capacity in bytes + hipDeviceAttributeMaxRegistersPerBlock, ///< 32-bit registers available to a thread block. This number is shared + ///< by all thread blocks simultaneously resident on a multiprocessor. + hipDeviceAttributeMaxRegistersPerMultiprocessor, ///< 32-bit registers available per block. + hipDeviceAttributeReservedSharedMemPerBlock, ///< Cuda11 only. Shared memory reserved by CUDA driver per block. + hipDeviceAttributeMaxSharedMemoryPerBlock, ///< Maximum shared memory available per block in bytes. + hipDeviceAttributeSharedMemPerBlockOptin, ///< Cuda only. Maximum shared memory per block usable by special opt in. + hipDeviceAttributeSharedMemPerMultiprocessor, ///< Cuda only. Shared memory available per multiprocessor. + hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision. + hipDeviceAttributeStreamPrioritiesSupported, ///< Cuda only. Whether to support stream priorities. + hipDeviceAttributeSurfaceAlignment, ///< Cuda only. Alignment requirement for surfaces + hipDeviceAttributeTccDriver, ///< Cuda only. Whether device is a Tesla device using TCC driver + hipDeviceAttributeTextureAlignment, ///< Alignment requirement for textures + hipDeviceAttributeTexturePitchAlignment, ///< Pitch alignment requirement for 2D texture references bound to pitched memory; + hipDeviceAttributeTotalConstantMemory, ///< Constant memory size in bytes. + hipDeviceAttributeTotalGlobalMem, ///< Global memory available on devicice. + hipDeviceAttributeUnifiedAddressing, ///< Cuda only. An unified address space shared with the host. + hipDeviceAttributeUuid, ///< Cuda only. Unique ID in 16 byte. + hipDeviceAttributeWarpSize, ///< Warp size in threads. + + hipDeviceAttributeCudaCompatibleEnd = 9999, + hipDeviceAttributeAmdSpecificBegin = 10000, + + hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin, ///< Frequency in khz of the timer used by the device-side "clock*" + hipDeviceAttributeArch, ///< Device architecture + hipDeviceAttributeMaxSharedMemoryPerMultiprocessor, ///< Maximum Shared Memory PerMultiprocessor. + hipDeviceAttributeGcnArch, ///< Device gcn architecture + hipDeviceAttributeGcnArchName, ///< Device gcnArch name in 256 bytes + hipDeviceAttributeHdpMemFlushCntl, ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeHdpRegFlushCntl, ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register + hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc, ///< Supports cooperative launch on multiple + ///< devices with unmatched functions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim, ///< Supports cooperative launch on multiple + ///< devices with unmatched grid dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim, ///< Supports cooperative launch on multiple + ///< devices with unmatched block dimensions + hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple + ///< devices with unmatched shared memories + hipDeviceAttributeIsLargeBar, ///< Whether it is LargeBar + hipDeviceAttributeAsicRevision, ///< Revision of the GPU in this device + hipDeviceAttributeCanUseStreamWaitValue, ///< '1' if Device supports hipStreamWaitValue32() and + ///< hipStreamWaitValue64() , '0' otherwise. + + hipDeviceAttributeAmdSpecificEnd = 19999, + hipDeviceAttributeVendorSpecificBegin = 20000, + // Extended attributes for vendors +} hipDeviceAttribute_t; + +//! Flags that can be used with hipStreamCreateWithFlags +#define hipStreamDefault \ + 0x00 ///< Default stream creation flags. These are used with hipStreamCreate(). +#define hipStreamNonBlocking 0x01 ///< Stream does not implicitly synchronize with null stream + + +//! Flags that can be used with hipEventCreateWithFlags: +#define hipEventDefault 0x0 ///< Default flags +#define hipEventBlockingSync \ + 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. +#define hipEventDisableTiming \ + 0x2 ///< Disable event's capability to record timing information. May improve performance. +#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP. +#define hipEventReleaseToDevice \ + 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to + /// obtain more precise timings of commands between events. The flag is a no-op on + /// CUDA platforms. +#define hipEventReleaseToSystem \ + 0x80000000 /// < Use a system-scope release when recording this event. This flag is + /// useful to make non-coherent host memory visible to the host. The flag is a + /// no-op on CUDA platforms. + + +#define hipDeviceScheduleAuto 0x0 ///< Automatically select between Spin and Yield +#define hipDeviceScheduleSpin \ + 0x1 ///< Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and + ///< may consume more power. +#define hipDeviceScheduleYield \ + 0x2 ///< Yield the CPU to the operating system when waiting. May increase latency, but lowers + ///< power and is friendlier to other threads in the system. +#define hipDeviceScheduleBlockingSync 0x4 +#define hipDeviceScheduleMask 0x7 +#define hipDeviceMapHost 0x8 +#define hipDeviceLmemResizeToMax 0x16 + +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, + hipJitOptionThreadsPerBlock, + hipJitOptionWallTime, + hipJitOptionInfoLogBuffer, + hipJitOptionInfoLogBufferSizeBytes, + hipJitOptionErrorLogBuffer, + hipJitOptionErrorLogBufferSizeBytes, + hipJitOptionOptimizationLevel, + hipJitOptionTargetFromContext, + hipJitOptionTarget, + hipJitOptionFallbackStrategy, + hipJitOptionGenerateDebugInfo, + hipJitOptionLogVerbose, + hipJitOptionGenerateLineInfo, + hipJitOptionCacheMode, + hipJitOptionSm3xOpt, + hipJitOptionFastCompile, + hipJitOptionNumOptions +} hipJitOption; + +// stop: hip_runtime_api.h -typedef int HIPdevice; /**< HIP device */ -typedef struct HIPctx_st *HIPcontext; /**< HIP context */ -typedef struct HIPevent_st *HIPevent; /**< HIP event */ -typedef struct HIPfunc_st *HIPfunction; /**< HIP function */ -typedef struct HIPmod_st *HIPmodule; /**< HIP module */ -typedef struct HIPstream_st *HIPstream; /**< HIP stream */ -typedef struct HIPlinkState_st *HIPlinkState; - - -typedef enum hipError_enum { - /** - * The API call returned with no errors. In the case of query calls, this - * also means that the operation being queried is complete (see - * ::hipEventQuery() and ::hipStreamQuery()). - */ - HIP_SUCCESS = 0, - - /** - * This indicates that one or more of the parameters passed to the API call - * is not within an acceptable range of values. - */ - HIP_ERROR_INVALID_VALUE = 1, - - /** - * The API call failed because it was unable to allocate enough memory to - * perform the requested operation. - */ - HIP_ERROR_OUT_OF_MEMORY = 2, - - /** - * This indicates that the HIP driver has not been initialized with - * ::hipInit() or that initialization has failed. - */ - HIP_ERROR_NOT_INITIALIZED = 3, - - /** - * This indicates that the HIP driver is in the process of shutting down. - */ - HIP_ERROR_DEINITIALIZED = 4, - - /** - * This indicates profiler is not initialized for this run. This can - * happen when the application is running with external profiling tools - * like visual profiler. - */ - HIP_ERROR_PROFILER_DISABLED = 5, - - /** - * \deprecated - * This error return is deprecated as of HIP 5.0. It is no longer an error - * to attempt to enable/disable the profiling via ::hipProfilerStart or - * ::hipProfilerStop without initialization. - */ - HIP_ERROR_PROFILER_NOT_INITIALIZED = 6, - - /** - * \deprecated - * This error return is deprecated as of HIP 5.0. It is no longer an error - * to call hipProfilerStart() when profiling is already enabled. - */ - HIP_ERROR_PROFILER_ALREADY_STARTED = 7, - - /** - * \deprecated - * This error return is deprecated as of HIP 5.0. It is no longer an error - * to call hipProfilerStop() when profiling is already disabled. - */ - HIP_ERROR_PROFILER_ALREADY_STOPPED = 8, - - /** - * This indicates that no HIP-capable devices were detected by the installed - * HIP driver. - */ - HIP_ERROR_NO_DEVICE = 100, - - /** - * This indicates that the device ordinal supplied by the user does not - * correspond to a valid HIP device. - */ - HIP_ERROR_INVALID_DEVICE = 101, - - - /** - * This indicates that the device kernel image is invalid. This can also - * indicate an invalid HIP module. - */ - HIP_ERROR_INVALID_IMAGE = 200, - - /** - * This most frequently indicates that there is no context bound to the - * hiprrent thread. This can also be returned if the context passed to an - * API call is not a valid handle (such as a context that has had - * ::hipCtxDestroy() invoked on it). This can also be returned if a user - * mixes different API versions (i.e. 3010 context with 3020 API calls). - * See ::hipCtxGetApiVersion() for more details. - */ - HIP_ERROR_INVALID_CONTEXT = 201, - - /** - * This indicated that the context being supplied as a parameter to the - * API call was already the active context. - * \deprecated - * This error return is deprecated as of HIP 3.2. It is no longer an - * error to attempt to push the active context via ::hipCtxPushCurrent(). - */ - HIP_ERROR_CONTEXT_ALREADY_CURRENT = 202, - - /** - * This indicates that a map or register operation has failed. - */ - HIP_ERROR_MAP_FAILED = 205, - - /** - * This indicates that an unmap or unregister operation has failed. - */ - HIP_ERROR_UNMAP_FAILED = 206, - - /** - * This indicates that the specified array is currently mapped and thus - * cannot be destroyed. - */ - HIP_ERROR_ARRAY_IS_MAPPED = 207, - - /** - * This indicates that the resource is already mapped. - */ - HIP_ERROR_ALREADY_MAPPED = 208, - - /** - * This indicates that there is no kernel image available that is suitable - * for the device. This can occur when a user specifies code generation - * options for a particular HIP source file that do not include the - * corresponding device configuration. - */ - HIP_ERROR_NO_BINARY_FOR_GPU = 209, - - /** - * This indicates that a resource has already been acquired. - */ - HIP_ERROR_ALREADY_ACQUIRED = 210, - - /** - * This indicates that a resource is not mapped. - */ - HIP_ERROR_NOT_MAPPED = 211, - - /** - * This indicates that a mapped resource is not available for access as an - * array. - */ - HIP_ERROR_NOT_MAPPED_AS_ARRAY = 212, - - /** - * This indicates that a mapped resource is not available for access as a - * pointer. - */ - HIP_ERROR_NOT_MAPPED_AS_POINTER = 213, - - /** - * This indicates that an uncorrectable ECC error was detected during - * execution. - */ - HIP_ERROR_ECC_UNCORRECTABLE = 214, - - /** - * This indicates that the ::HIPlimit passed to the API call is not - * supported by the active device. - */ - HIP_ERROR_UNSUPPORTED_LIMIT = 215, - - /** - * This indicates that the ::HIPcontext passed to the API call can - * only be bound to a single CPU thread at a time but is already - * bound to a CPU thread. - */ - HIP_ERROR_CONTEXT_ALREADY_IN_USE = 216, - - /** - * This indicates that peer access is not supported across the given - * devices. - */ - HIP_ERROR_PEER_ACCESS_UNSUPPORTED = 217, - - /** - * This indicates that a PTX JIT compilation failed. - */ - HIP_ERROR_INVALID_PTX = 218, - - /** - * This indicates an error with OpenGL or DirectX context. - */ - HIP_ERROR_INVALID_GRAPHICS_CONTEXT = 219, - - /** - * This indicates that an uncorrectable NVLink error was detected during the - * execution. - */ - HIP_ERROR_NVLINK_UNCORRECTABLE = 220, - - /** - * This indicates that the PTX JIT compiler library was not found. - */ - HIP_ERROR_JIT_COMPILER_NOT_FOUND = 221, - - /** - * This indicates that the device kernel source is invalid. - */ - HIP_ERROR_INVALID_SOURCE = 300, - - /** - * This indicates that the file specified was not found. - */ - HIP_ERROR_FILE_NOT_FOUND = 301, - - /** - * This indicates that a link to a shared object failed to resolve. - */ - HIP_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, - - /** - * This indicates that initialization of a shared object failed. - */ - HIP_ERROR_SHARED_OBJECT_INIT_FAILED = 303, - - /** - * This indicates that an OS call failed. - */ - HIP_ERROR_OPERATING_SYSTEM = 304, - - /** - * This indicates that a resource handle passed to the API call was not - * valid. Resource handles are opaque types like ::HIPstream and ::HIPevent. - */ - HIP_ERROR_INVALID_HANDLE = 400, - - /** - * This indicates that a resource required by the API call is not in a - * valid state to perform the requested operation. - */ - HIP_ERROR_ILLEGAL_STATE = 401, - - /** - * This indicates that a named symbol was not found. Examples of symbols - * are global/constant variable names, texture names, and surface names. - */ - HIP_ERROR_NOT_FOUND = 500, - - /** - * This indicates that asynchronous operations issued previously have not - * completed yet. This result is not actually an error, but must be indicated - * differently than ::HIP_SUCCESS (which indicates completion). Calls that - * may return this value include ::hipEventQuery() and ::hipStreamQuery(). - */ - HIP_ERROR_NOT_READY = 600, - - /** - * While executing a kernel, the device encountered a - * load or store instruction on an invalid memory address. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_ILLEGAL_ADDRESS = 700, - - /** - * This indicates that a launch did not occur because it did not have - * appropriate resources. This error usually indicates that the user has - * attempted to pass too many arguments to the device kernel, or the - * kernel launch specifies too many threads for the kernel's register - * count. Passing arguments of the wrong size (i.e. a 64-bit pointer - * when a 32-bit int is expected) is equivalent to passing too many - * arguments and can also result in this error. - */ - HIP_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, - - /** - * This indicates that the device kernel took too long to execute. This can - * only occur if timeouts are enabled - see the device attribute - * ::HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_LAUNCH_TIMEOUT = 702, - - /** - * This error indicates a kernel launch that uses an incompatible texturing - * mode. - */ - HIP_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, - - /** - * This error indicates that a call to ::hipCtxEnablePeerAccess() is - * trying to re-enable peer access to a context which has already - * had peer access to it enabled. - */ - HIP_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, - - /** - * This error indicates that ::hipCtxDisablePeerAccess() is - * trying to disable peer access which has not been enabled yet - * via ::hipCtxEnablePeerAccess(). - */ - HIP_ERROR_PEER_ACCESS_NOT_ENABLED = 705, - - /** - * This error indicates that the primary context for the specified device - * has already been initialized. - */ - HIP_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, - - /** - * This error indicates that the context hiprrent to the calling thread - * has been destroyed using ::hipCtxDestroy, or is a primary context which - * has not yet been initialized. - */ - HIP_ERROR_CONTEXT_IS_DESTROYED = 709, - - /** - * A device-side assert triggered during kernel execution. The context - * cannot be used anymore, and must be destroyed. All existing device - * memory allocations from this context are invalid and must be - * reconstructed if the program is to continue using HIP. - */ - HIP_ERROR_ASSERT = 710, - - /** - * This error indicates that the hardware resources required to enable - * peer access have been exhausted for one or more of the devices - * passed to ::hipCtxEnablePeerAccess(). - */ - HIP_ERROR_TOO_MANY_PEERS = 711, - - /** - * This error indicates that the memory range passed to ::hipMemHostRegister() - * has already been registered. - */ - HIP_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712, - - /** - * This error indicates that the pointer passed to ::hipMemHostUnregister() - * does not correspond to any currently registered memory region. - */ - HIP_ERROR_HOST_MEMORY_NOT_REGISTERED = 713, - - /** - * While executing a kernel, the device encountered a stack error. - * This can be due to stack corruption or exceeding the stack size limit. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_HARDWARE_STACK_ERROR = 714, - - /** - * While executing a kernel, the device encountered an illegal instruction. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_ILLEGAL_INSTRUCTION = 715, - - /** - * While executing a kernel, the device encountered a load or store instruction - * on a memory address which is not aligned. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_MISALIGNED_ADDRESS = 716, - - /** - * While executing a kernel, the device encountered an instruction - * which can only operate on memory locations in certain address spaces - * (global, shared, or local), but was supplied a memory address not - * belonging to an allowed address space. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_INVALID_ADDRESS_SPACE = 717, - - /** - * While executing a kernel, the device program counter wrapped its address space. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_INVALID_PC = 718, - - /** - * An exception occurred on the device while executing a kernel. Common - * causes include dereferencing an invalid device pointer and accessing - * out of bounds shared memory. Less common cases can be system specific - more - * information about these cases can be found in the system specific user guide. - * This leaves the process in an inconsistent state and any further HIP work - * will return the same error. To continue using HIP, the process must be terminated - * and relaunched. - */ - HIP_ERROR_LAUNCH_FAILED = 719, - - /** - * This error indicates that the number of blocks launched per grid for a kernel that was - * launched via either ::hipLaunchCooperativeKernel or ::hipLaunchCooperativeKernelMultiDevice - * exceeds the maximum number of blocks as allowed by ::hipOccupancyMaxActiveBlocksPerMultiprocessor - * or ::hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors - * as specified by the device attribute ::HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. - */ - HIP_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720, - - /** - * This error indicates that the attempted operation is not permitted. - */ - HIP_ERROR_NOT_PERMITTED = 800, - - /** - * This error indicates that the attempted operation is not supported - * on the current system or device. - */ - HIP_ERROR_NOT_SUPPORTED = 801, - - /** - * This error indicates that the system is not yet ready to start any HIP - * work. To continue using HIP, verify the system configuration is in a - * valid state and all required driver daemons are actively running. - * More information about this error can be found in the system specific - * user guide. - */ - HIP_ERROR_SYSTEM_NOT_READY = 802, - - /** - * This error indicates that there is a mismatch between the versions of - * the display driver and the HIP driver. Refer to the compatibility documentation - * for supported versions. - */ - HIP_ERROR_SYSTEM_DRIVER_MISMATCH = 803, - - /** - * This error indicates that the system was upgraded to run with forward compatibility - * but the visible hardware detected by HIP does not support this configuration. - * Refer to the compatibility documentation for the supported hardware matrix or ensure - * that only supported hardware is visible during initialization via the HIP_VISIBLE_DEVICES - * environment variable. - */ - HIP_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804, - - /** - * This error indicates that the operation is not permitted when - * the stream is capturing. - */ - HIP_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900, - - /** - * This error indicates that the current capture sequence on the stream - * has been invalidated due to a previous error. - */ - HIP_ERROR_STREAM_CAPTURE_INVALIDATED = 901, - - /** - * This error indicates that the operation would have resulted in a merge - * of two independent capture sequences. - */ - HIP_ERROR_STREAM_CAPTURE_MERGE = 902, - - /** - * This error indicates that the capture was not initiated in this stream. - */ - HIP_ERROR_STREAM_CAPTURE_UNMATCHED = 903, - - /** - * This error indicates that the capture sequence contains a fork that was - * not joined to the primary stream. - */ - HIP_ERROR_STREAM_CAPTURE_UNJOINED = 904, - - /** - * This error indicates that a dependency would have been created which - * crosses the capture sequence boundary. Only implicit in-stream ordering - * dependencies are allowed to cross the boundary. - */ - HIP_ERROR_STREAM_CAPTURE_ISOLATION = 905, - - /** - * This error indicates a disallowed implicit dependency on a current capture - * sequence from HIPStreamLegacy. - */ - HIP_ERROR_STREAM_CAPTURE_IMPLICIT = 906, - - /** - * This error indicates that the operation is not permitted on an event which - * was last recorded in a capturing stream. - */ - HIP_ERROR_CAPTURED_EVENT = 907, - - /** - * A stream capture sequence not initiated with the ::HIP_STREAM_CAPTURE_MODE_RELAXED - * argument to ::HIPStreamBeginCapture was passed to ::hipStreamEndCapture in a - * different thread. - */ - HIP_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908, - - /** - * This indicates that an unknown internal error has occurred. - */ - HIP_ERROR_UNKNOWN = 999 -} HIPresult; - -/** - * Online compiler and linker options - */ -typedef enum HIPjit_option_enum -{ - /** - * Max number of registers that a thread may use.\n - * Option type: unsigned int\n - * Applies to: compiler only - */ - HIP_JIT_MAX_REGISTERS = 0, - - /** - * IN: Specifies minimum number of threads per block to target compilation - * for\n - * OUT: Returns the number of threads the compiler actually targeted. - * This restricts the resource utilization fo the compiler (e.g. max - * registers) such that a block with the given number of threads should be - * able to launch based on register limitations. Note, this option does not - * currently take into account any other resource limitations, such as - * shared memory utilization.\n - * Cannot be combined with ::HIP_JIT_TARGET.\n - * Option type: unsigned int\n - * Applies to: compiler only - */ - HIP_JIT_THREADS_PER_BLOCK, - - /** - * Overwrites the option value with the total wall clock time, in - * milliseconds, spent in the compiler and linker\n - * Option type: float\n - * Applies to: compiler and linker - */ - HIP_JIT_WALL_TIME, - - /** - * Pointer to a buffer in which to print any log messages - * that are informational in nature (the buffer size is specified via - * option ::HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES)\n - * Option type: char *\n - * Applies to: compiler and linker - */ - HIP_JIT_INFO_LOG_BUFFER, - - /** - * IN: Log buffer size in bytes. Log messages will be capped at this size - * (including null terminator)\n - * OUT: Amount of log buffer filled with messages\n - * Option type: unsigned int\n - * Applies to: compiler and linker - */ - HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES, - - /** - * Pointer to a buffer in which to print any log messages that - * reflect errors (the buffer size is specified via option - * ::HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n - * Option type: char *\n - * Applies to: compiler and linker - */ - HIP_JIT_ERROR_LOG_BUFFER, - - /** - * IN: Log buffer size in bytes. Log messages will be capped at this size - * (including null terminator)\n - * OUT: Amount of log buffer filled with messages\n - * Option type: unsigned int\n - * Applies to: compiler and linker - */ - HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, - - /** - * Level of optimizations to apply to generated code (0 - 4), with 4 - * being the default and highest level of optimizations.\n - * Option type: unsigned int\n - * Applies to: compiler only - */ - HIP_JIT_OPTIMIZATION_LEVEL, - - /** - * No option value required. Determines the target based on the current - * attached context (default)\n - * Option type: No option value needed\n - * Applies to: compiler and linker - */ - HIP_JIT_TARGET_FROM_HIPCONTEXT, - - /** - * Target is chosen based on supplied ::HIPjit_target. Cannot be - * combined with ::HIP_JIT_THREADS_PER_BLOCK.\n - * Option type: unsigned int for enumerated type ::HIPjit_target\n - * Applies to: compiler and linker - */ - HIP_JIT_TARGET, - - /** - * Specifies choice of fallback strategy if matching HIPbin is not found. - * Choice is based on supplied ::HIPjit_fallback. This option cannot be - * used with HIPLink* APIs as the linker requires exact matches.\n - * Option type: unsigned int for enumerated type ::HIPjit_fallback\n - * Applies to: compiler only - */ - HIP_JIT_FALLBACK_STRATEGY, - - /** - * Specifies whether to create debug information in output (-g) - * (0: false, default)\n - * Option type: int\n - * Applies to: compiler and linker - */ - HIP_JIT_GENERATE_DEBUG_INFO, - - /** - * Generate verbose log messages (0: false, default)\n - * Option type: int\n - * Applies to: compiler and linker - */ - HIP_JIT_LOG_VERBOSE, - - /** - * Generate line number information (-lineinfo) (0: false, default)\n - * Option type: int\n - * Applies to: compiler only - */ - HIP_JIT_GENERATE_LINE_INFO, - - /** - * Specifies whether to enable caching explicitly (-dlcm) \n - * Choice is based on supplied ::HIPjit_cacheMode_enum.\n - * Option type: unsigned int for enumerated type ::HIPjit_cacheMode_enum\n - * Applies to: compiler only - */ - HIP_JIT_CACHE_MODE, - - /** - * The below jit options are used for internal purposes only, in this version of HIP - */ - HIP_JIT_NEW_SM3X_OPT, - HIP_JIT_FAST_COMPILE, - - /** - * Array of device symbol names that will be relocated to the corresponing - * host addresses stored in ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES.\n - * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n - * When loding a device module, driver will relocate all encountered - * unresolved symbols to the host addresses.\n - * It is only allowed to register symbols that correspond to unresolved - * global variables.\n - * It is illegal to register the same device symbol at multiple addresses.\n - * Option type: const char **\n - * Applies to: dynamic linker only - */ - HIP_JIT_GLOBAL_SYMBOL_NAMES, - - /** - * Array of host addresses that will be used to relocate corresponding - * device symbols stored in ::HIP_JIT_GLOBAL_SYMBOL_NAMES.\n - * Must contain ::HIP_JIT_GLOBAL_SYMBOL_COUNT entries.\n - * Option type: void **\n - * Applies to: dynamic linker only - */ - HIP_JIT_GLOBAL_SYMBOL_ADDRESSES, - - /** - * Number of entries in ::HIP_JIT_GLOBAL_SYMBOL_NAMES and - * ::HIP_JIT_GLOBAL_SYMBOL_ADDRESSES arrays.\n - * Option type: unsigned int\n - * Applies to: dynamic linker only - */ - HIP_JIT_GLOBAL_SYMBOL_COUNT, - - HIP_JIT_NUM_OPTIONS - -} HIPjit_option; - - -/** - * Device properties - */ -typedef enum HIPdevice_attribute_enum { - - HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, /**< Maximum number of threads per block */ - HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 1, /**< Maximum block dimension X */ - HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 2, /**< Maximum block dimension Y */ - HIP_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 3, /**< Maximum block dimension Z */ - HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 4, /**< Maximum grid dimension X */ - HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 5, /**< Maximum grid dimension Y */ - HIP_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 6, /**< Maximum grid dimension Z */ - HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 7, /**< Maximum shared memory available per block in bytes */ - HIP_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 7, /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ - HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 7, /**< Maximum optin shared memory per block */ - HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 8, /**< Memory available on device for __constant__ variables in a HIP C kernel in bytes */ - HIP_DEVICE_ATTRIBUTE_WARP_SIZE = 9, /**< Warp size in threads */ - HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 10, /**< Maximum number of 32-bit registers available per block */ - HIP_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 10, /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ - HIP_DEVICE_ATTRIBUTE_CLOCK_RATE = 11, /**< Typical clock frequency in kilohertz */ - HIP_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 12, /**< Peak memory clock frequency in kilohertz */ - HIP_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 13, /**< Global memory bus width in bits */ - HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 14, /**< Number of multiprocessors on device */ - HIP_DEVICE_ATTRIBUTE_COMPUTE_MODE = 15, /**< Compute mode (See ::HIPcomputemode for details) */ - HIP_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 16, /**< Size of L2 cache in bytes */ - HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 17, /**< Maximum resident threads per multiprocessor */ - HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 18, /**< Major compute capability version number */ - HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 19, /**< Minor compute capability version number */ - HIP_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 20, /**< Device can possibly execute multiple kernels concurrently */ - HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID = 21, /**< PCI bus ID of the device */ - HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 22, /**< PCI device ID of the device */ - HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 22, /**< PCI domain ID of the device */ - HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 23, /**< Maximum shared memory available per multiprocessor in bytes */ - HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 24, /**< Device is on a multi-GPU board */ - HIP_DEVICE_ATTRIBUTE_INTEGRATED = 25, /**< Device is integrated with host memory */ - HIP_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 26, /**< Device supports launching cooperative kernels via ::hipLaunchCooperativeKernel */ - HIP_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 27, /**< Device can participate in cooperative kernels launched via ::hipLaunchCooperativeKernelMultiDevice */ - HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 28, /**< Maximum 1D texture width */ - HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 29, /**< Maximum 2D texture width */ - HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 30, /**< Maximum 2D texture height */ - HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 31, /**< Maximum 3D texture width */ - HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 32, /**< Maximum 3D texture height */ - HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 33, /**< Maximum 3D texture depth */ - - HIP_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 37, /**< Alignment requirement for textures */ - HIP_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 38, /**< Pitch alignment requirement for textures */ - HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 39, /**< Specifies whether there is a run time limit on kernels */ - HIP_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 40, /**< Device can map host memory into HIP address space */ - HIP_DEVICE_ATTRIBUTE_ECC_ENABLED = 41, /**< Device has ECC support enabled */ - - HIP_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 47, /**< Device can allocate managed memory on this system */ - HIP_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 48, /**< The host can directly access managed memory on the device without migration. */ - HIP_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 49, /**< Device can coherently access managed memory concurrently with the CPU */ - HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 50, /**< Device supports coherently accessing pageable memory without calling HIPHostRegister on it */ - HIP_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 51, /**< Device accesses pageable memory via the host's page tables. */ - HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 52, /**< ::HIP_STREAM_WAIT_VALUE_NOR is supported. */ - - - // HIP_DEVICE_ATTRIBUTE_MAX_PITCH = , /**< Maximum pitch in bytes allowed by memory copies */ - // HIP_DEVICE_ATTRIBUTE_GPU_OVERLAP = , /**< Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT. */ - // - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = , /**< Maximum 2D layered texture width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = , /**< Maximum 2D layered texture height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = , /**< Maximum layers in a 2D layered texture */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = , /**< Deprecated, use HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS */ - // HIP_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT =, /**< Alignment requirement for surfaces */ - // HIP_DEVICE_ATTRIBUTE_TCC_DRIVER = , /**< Device is using TCC driver model */ - // HIP_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = , /**< Number of asynchronous engines */ - // HIP_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = , /**< Device shares a unified address space with the host */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = , /**< Maximum 1D layered texture width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = , /**< Maximum layers in a 1D layered texture */ - // HIP_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = , /**< Deprecated, do not use. */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = , /**< Maximum 2D texture width if HIP_ARRAY3D_TEXTURE_GATHER is set */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = , /**< Maximum 2D texture height if HIP_ARRAY3D_TEXTURE_GATHER is set */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = , /**< Alternate maximum 3D texture width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = ,/**< Alternate maximum 3D texture height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = , /**< Alternate maximum 3D texture depth */ - // - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = , /**< Maximum cubemap texture width/height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = , /**< Maximum cubemap layered texture width/height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered texture */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = , /**< Maximum 1D surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = , /**< Maximum 2D surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = , /**< Maximum 2D surface height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = , /**< Maximum 3D surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = , /**< Maximum 3D surface height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = , /**< Maximum 3D surface depth */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = , /**< Maximum 1D layered surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = , /**< Maximum layers in a 1D layered surface */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = , /**< Maximum 2D layered surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = , /**< Maximum 2D layered surface height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = , /**< Maximum layers in a 2D layered surface */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = , /**< Maximum cubemap surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = , /**< Maximum cubemap layered surface width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = , /**< Maximum layers in a cubemap layered surface */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = , /**< Maximum 1D linear texture width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = , /**< Maximum 2D linear texture width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = , /**< Maximum 2D linear texture height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = , /**< Maximum 2D linear texture pitch in bytes */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 2D texture width */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = ,/**< Maximum mipmapped 2D texture height */ - // HIP_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = , /**< Maximum mipmapped 1D texture width */ - // HIP_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = , /**< Device supports stream priorities */ - // HIP_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = , /**< Device supports caching globals in L1 */ - // HIP_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = , /**< Device supports caching locals in L1 */ - // HIP_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = , /**< Maximum number of 32-bit registers available per multiprocessor */ - // HIP_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = , /**< Unique id for a group of devices on the same multi-GPU board */ - // HIP_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = , /**< Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)*/ - // HIP_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = , /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */ - // HIP_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = , /**< Device supports compute preemption. */ - // HIP_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = , /**< Device can access host registered memory at the same virtual address as the CPU */ - // HIP_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = , /**< ::hipStreamBatchMemOp and related APIs are supported. */ - // HIP_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = , /**< 64-bit operations are supported in ::hipStreamBatchMemOp and related APIs. */ - // HIP_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = , /**< Both the ::HIP_STREAM_WAIT_VALUE_FLUSH flag and the ::HIP_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref HIP_MEMOP for additional details. */ - // HIP_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = , /**< Device supports host memory registration via ::HIPHostRegister. */ - // HIP_DEVICE_ATTRIBUTE_MAX -} HIPdevice_attribute; - -/** - * Function cache configurations - */ -typedef enum HIPfunc_cache_enum { - HIP_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ - HIP_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ - HIP_FUNC_CACHE_PREFER_L1 = 0x02, /**< prefer larger L1 cache and smaller shared memory */ - HIP_FUNC_CACHE_PREFER_EQUAL = 0x03 /**< prefer equal sized L1 cache and shared memory */ -} HIPfunc_cache; - -/** - * Shared memory configurations - */ -typedef enum HIPsharedconfig_enum { - HIP_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00, /**< set default shared memory bank size */ - HIP_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01, /**< set shared memory bank width to four bytes */ - HIP_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 /**< set shared memory bank width to eight bytes */ -} HIPsharedconfig; - -/** - * Function properties - */ -typedef enum HIPfunction_attribute_enum { - /** - * The maximum number of threads per block, beyond which a launch of the - * function would fail. This number depends on both the function and the - * device on which the function is currently loaded. - */ - HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, - - /** - * The size in bytes of statically-allocated shared memory required by - * this function. This does not include dynamically-allocated shared - * memory requested by the user at runtime. - */ - HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, - - /** - * The size in bytes of user-allocated constant memory required by this - * function. - */ - HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, - - /** - * The size in bytes of local memory used by each thread of this function. - */ - HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, - - /** - * The number of registers used by each thread of this function. - */ - HIP_FUNC_ATTRIBUTE_NUM_REGS = 4, - - /** - * The PTX virtual architecture version for which the function was - * compiled. This value is the major PTX version * 10 + the minor PTX - * version, so a PTX version 1.3 function would return the value 13. - * Note that this may return the undefined value of 0 for cubins - * compiled prior to HIP 3.0. - */ - HIP_FUNC_ATTRIBUTE_PTX_VERSION = 5, - - /** - * The binary architecture version for which the function was compiled. - * This value is the major binary version * 10 + the minor binary version, - * so a binary version 1.3 function would return the value 13. Note that - * this will return a value of 10 for legacy cubins that do not have a - * properly-encoded binary architecture version. - */ - HIP_FUNC_ATTRIBUTE_BINARY_VERSION = 6, - - /** - * The attribute to indicate whether the function has been compiled with - * user specified option "-Xptxas --dlcm=ca" set . - */ - HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7, - - /** - * The maximum size in bytes of dynamically-allocated shared memory that can be used by - * this function. If the user-specified dynamic shared memory size is larger than this - * value, the launch will fail. - * See ::hipFuncSetAttribute - */ - HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8, - - /** - * On devices where the L1 cache and shared memory use the same hardware resources, - * this sets the shared memory carveout preference, in percent of the total shared memory. - * Refer to ::HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR. - * This is only a hint, and the driver can choose a different ratio if required to execute the function. - * See ::hipFuncSetAttribute - */ - HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9, - - HIP_FUNC_ATTRIBUTE_MAX -} HIPfunction_attribute; - -/** - * Context creation flags - */ -typedef enum HIPctx_flags_enum { - HIP_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ - HIP_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ - HIP_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ - HIP_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ - HIP_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling - * \deprecated This flag was deprecated as of HIP 4.0 - * and was replaced with ::HIP_CTX_SCHED_BLOCKING_SYNC. */ - HIP_CTX_SCHED_MASK = 0x07, - HIP_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ - HIP_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ - HIP_CTX_FLAGS_MASK = 0x1f -} HIPctx_flags; - -/** - * Stream creation flags - */ -typedef enum HIPstream_flags_enum { - HIP_STREAM_DEFAULT = 0x0, /**< Default stream flag */ - HIP_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */ -} HIPstream_flags; - -/** - * Event creation flags - */ -typedef enum HIPevent_flags_enum { - HIP_EVENT_DEFAULT = 0x0, /**< Default event flag */ - HIP_EVENT_BLOCKING_SYNC = 0x1, /**< Event uses blocking synchronization */ - HIP_EVENT_DISABLE_TIMING = 0x2, /**< Event will not record timing data */ - HIP_EVENT_INTERPROCESS = 0x4 /**< Event is suitable for interprocess use. HIP_EVENT_DISABLE_TIMING must be set */ -} HIPevent_flags; - -typedef enum HIPjitInputType_enum -{ - /** - * Compiled device-class-specific device code\n - * Applicable options: none - */ - HIP_JIT_INPUT_HIPBIN = 0, - - /** - * PTX source code\n - * Applicable options: PTX compiler options - */ - HIP_JIT_INPUT_PTX, - - /** - * Bundle of multiple cubins and/or PTX of some device code\n - * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY - */ - HIP_JIT_INPUT_FATBINARY, - - /** - * Host object with embedded device code\n - * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY - */ - HIP_JIT_INPUT_OBJECT, - - /** - * Archive of host objects with embedded device code\n - * Applicable options: PTX compiler options, ::HIP_JIT_FALLBACK_STRATEGY - */ - HIP_JIT_INPUT_LIBRARY, - - HIP_JIT_NUM_INPUT_TYPES -} HIPjitInputType; #ifdef _WIN32 #define HIPAPI __stdcall @@ -1004,66 +353,44 @@ typedef enum HIPjitInputType_enum #define HIP_API_CALL HIPAPI -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXCREATE) (HIPcontext *, unsigned int, HIPdevice); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXDESTROY) (HIPcontext); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCACHECONFIG) (HIPfunc_cache *); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETCURRENT) (HIPcontext *); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXGETSHAREDMEMCONFIG) (HIPsharedconfig *); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (HIPcontext *); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (HIPcontext); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCACHECONFIG) (HIPfunc_cache); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (HIPcontext); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSETSHAREDMEMCONFIG) (HIPsharedconfig); -typedef HIPresult (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (); -typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, HIPdevice_attribute, HIPdevice); -typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *); -typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGET) (HIPdevice *, int); -typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, HIPdevice); -typedef HIPresult (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, HIPdevice); -typedef HIPresult (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *); -typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTCREATE) (HIPevent *, unsigned int); -typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTDESTROY) (HIPevent); -typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, HIPevent, HIPevent); -typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTQUERY) (HIPevent); -typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTRECORD) (HIPevent, HIPstream); -typedef HIPresult (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE) (HIPevent); -typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE) (int *, HIPfunction_attribute, HIPfunction); -typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETATTRIBUTE) (HIPfunction, HIPfunction_attribute, int); -typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETCACHECONFIG) (HIPfunction, HIPfunc_cache); -typedef HIPresult (HIP_API_CALL *HIP_HIPFUNCSETSHAREDMEMCONFIG) (HIPfunction, HIPsharedconfig); -typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORNAME) (HIPresult, const char **); -typedef HIPresult (HIP_API_CALL *HIP_HIPGETERRORSTRING) (HIPresult, const char **); -typedef HIPresult (HIP_API_CALL *HIP_HIPINIT) (unsigned int); -typedef HIPresult (HIP_API_CALL *HIP_HIPLAUNCHKERNEL) (HIPfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, HIPstream, void **, void **); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOC) (HIPdeviceptr *, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMALLOCHOST) (void **, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOD) (HIPdeviceptr, HIPdeviceptr, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC) (HIPdeviceptr, HIPdeviceptr, size_t, HIPstream); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOH) (void *, HIPdeviceptr, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC) (void *, HIPdeviceptr, size_t, HIPstream); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTOD) (HIPdeviceptr, const void *, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC) (HIPdeviceptr, const void *, size_t, HIPstream); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREE) (HIPdeviceptr); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMFREEHOST) (void *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD32) (HIPdeviceptr, unsigned int, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMEMSETD8) (HIPdeviceptr, unsigned char, size_t); -typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION) (HIPfunction *, HIPmodule, const char *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (HIPdeviceptr *, size_t *, HIPmodule, const char *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOAD) (HIPmodule *, const char *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATA) (HIPmodule *, const void *); -typedef HIPresult (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (HIPmodule *, const void *, unsigned int, HIPjit_option *, void **); -typedef HIPresult (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (HIPmodule); -typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTART) (); -typedef HIPresult (HIP_API_CALL *HIP_HIPPROFILERSTOP) (); -typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMCREATE) (HIPstream *, unsigned int); -typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (HIPstream); -typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (HIPstream); -typedef HIPresult (HIP_API_CALL *HIP_HIPSTREAMWAITEVENT) (HIPstream, HIPevent, unsigned int); -typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCREATE) (unsigned int, HIPjit_option *, void **, HIPlinkState *); -typedef HIPresult (HIP_API_CALL *HIP_HIPLINKADDDATA) (HIPlinkState, HIPjitInputType, void *, size_t, const char *, unsigned int, HIPjit_option *, void **); -typedef HIPresult (HIP_API_CALL *HIP_HIPLINKDESTROY) (HIPlinkState); -typedef HIPresult (HIP_API_CALL *HIP_HIPLINKCOMPLETE) (HIPlinkState, void **, size_t *); +typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE) (hipCtx_t *, unsigned int, hipDevice_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY) (hipCtx_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT) (hipCtx_t *); +typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT) (hipCtx_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT) (hipCtx_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE) (); +typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE) (int *, hipDeviceAttribute_t, hipDevice_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT) (int *); +typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGET) (hipDevice_t *, int); +typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME) (char *, int, hipDevice_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM) (size_t *, hipDevice_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION) (int *); +typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE) (hipEvent_t *, unsigned int); +typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY) (hipEvent_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME) (float *, hipEvent_t, hipEvent_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD) (hipEvent_t, hipStream_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTSYNCHRONIZE) (hipEvent_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPFUNCGETATTRIBUTE) (int *, hipFunction_attribute, hipFunction_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPGETERRORNAME) (hipError_t, const char **); +typedef hipError_t (HIP_API_CALL *HIP_HIPGETERRORSTRING) (hipError_t, const char **); +typedef hipError_t (HIP_API_CALL *HIP_HIPINIT) (unsigned int); +typedef hipError_t (HIP_API_CALL *HIP_HIPLAUNCHKERNEL) (hipFunction_t, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, hipStream_t, void **, void **); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMALLOC) (hipDeviceptr_t *, size_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMFREE) (hipDeviceptr_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMGETINFO) (size_t *, size_t *); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOD) (hipDeviceptr_t, hipDeviceptr_t, size_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC) (hipDeviceptr_t, hipDeviceptr_t, size_t, hipStream_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOH) (void *, hipDeviceptr_t, size_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC) (void *, hipDeviceptr_t, size_t, hipStream_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTOD) (hipDeviceptr_t, const void *, size_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC) (hipDeviceptr_t, const void *, size_t, hipStream_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETFUNCTION) (hipFunction_t *, hipModule_t, const char *); +typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL) (hipDeviceptr_t *, size_t *, hipModule_t, const char *); +typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX) (hipModule_t *, const void *, unsigned int, hipJitOption *, void **); +typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD) (hipModule_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE) (hipStream_t *, unsigned int); +typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY) (hipStream_t); +typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE) (hipStream_t); typedef struct hc_hip_lib { @@ -1071,14 +398,9 @@ typedef struct hc_hip_lib HIP_HIPCTXCREATE hipCtxCreate; HIP_HIPCTXDESTROY hipCtxDestroy; - HIP_HIPCTXGETCACHECONFIG hipCtxGetCacheConfig; - HIP_HIPCTXGETCURRENT hipCtxGetCurrent; - HIP_HIPCTXGETSHAREDMEMCONFIG hipCtxGetSharedMemConfig; HIP_HIPCTXPOPCURRENT hipCtxPopCurrent; HIP_HIPCTXPUSHCURRENT hipCtxPushCurrent; - HIP_HIPCTXSETCACHECONFIG hipCtxSetCacheConfig; HIP_HIPCTXSETCURRENT hipCtxSetCurrent; - HIP_HIPCTXSETSHAREDMEMCONFIG hipCtxSetSharedMemConfig; HIP_HIPCTXSYNCHRONIZE hipCtxSynchronize; HIP_HIPDEVICEGETATTRIBUTE hipDeviceGetAttribute; HIP_HIPDEVICEGETCOUNT hipDeviceGetCount; @@ -1089,46 +411,29 @@ typedef struct hc_hip_lib HIP_HIPEVENTCREATE hipEventCreate; HIP_HIPEVENTDESTROY hipEventDestroy; HIP_HIPEVENTELAPSEDTIME hipEventElapsedTime; - HIP_HIPEVENTQUERY hipEventQuery; HIP_HIPEVENTRECORD hipEventRecord; HIP_HIPEVENTSYNCHRONIZE hipEventSynchronize; HIP_HIPFUNCGETATTRIBUTE hipFuncGetAttribute; - HIP_HIPFUNCSETATTRIBUTE hipFuncSetAttribute; - HIP_HIPFUNCSETCACHECONFIG hipFuncSetCacheConfig; - HIP_HIPFUNCSETSHAREDMEMCONFIG hipFuncSetSharedMemConfig; HIP_HIPGETERRORNAME hipGetErrorName; HIP_HIPGETERRORSTRING hipGetErrorString; HIP_HIPINIT hipInit; HIP_HIPLAUNCHKERNEL hipLaunchKernel; HIP_HIPMEMALLOC hipMemAlloc; - HIP_HIPMEMALLOCHOST hipMemAllocHost; + HIP_HIPMEMFREE hipMemFree; + HIP_HIPMEMGETINFO hipMemGetInfo; HIP_HIPMEMCPYDTOD hipMemcpyDtoD; HIP_HIPMEMCPYDTODASYNC hipMemcpyDtoDAsync; HIP_HIPMEMCPYDTOH hipMemcpyDtoH; HIP_HIPMEMCPYDTOHASYNC hipMemcpyDtoHAsync; HIP_HIPMEMCPYHTOD hipMemcpyHtoD; HIP_HIPMEMCPYHTODASYNC hipMemcpyHtoDAsync; - HIP_HIPMEMFREE hipMemFree; - HIP_HIPMEMFREEHOST hipMemFreeHost; - HIP_HIPMEMGETINFO hipMemGetInfo; - HIP_HIPMEMSETD32 hipMemsetD32; - HIP_HIPMEMSETD8 hipMemsetD8; HIP_HIPMODULEGETFUNCTION hipModuleGetFunction; HIP_HIPMODULEGETGLOBAL hipModuleGetGlobal; - HIP_HIPMODULELOAD hipModuleLoad; - HIP_HIPMODULELOADDATA hipModuleLoadData; HIP_HIPMODULELOADDATAEX hipModuleLoadDataEx; HIP_HIPMODULEUNLOAD hipModuleUnload; - HIP_HIPPROFILERSTART hipProfilerStart; - HIP_HIPPROFILERSTOP hipProfilerStop; HIP_HIPSTREAMCREATE hipStreamCreate; HIP_HIPSTREAMDESTROY hipStreamDestroy; HIP_HIPSTREAMSYNCHRONIZE hipStreamSynchronize; - HIP_HIPSTREAMWAITEVENT hipStreamWaitEvent; - HIP_HIPLINKCREATE hipLinkCreate; - HIP_HIPLINKADDDATA hipLinkAddData; - HIP_HIPLINKDESTROY hipLinkDestroy; - HIP_HIPLINKCOMPLETE hipLinkComplete; } hc_hip_lib_t; diff --git a/include/ext_hiprtc.h b/include/ext_hiprtc.h index cd1be6c4b..347239c38 100644 --- a/include/ext_hiprtc.h +++ b/include/ext_hiprtc.h @@ -6,41 +6,26 @@ #ifndef _EXT_HIPRTC_H #define _EXT_HIPRTC_H -/** - * from hip_runtime.h (/opt/rocm/hip/include/hip/amd_detail/hiprtc.h) - */ +// start: amd_detail/hiprtc.h -/** - * \ingroup error - * \brief The enumerated type hiprtcResult defines API call result codes. - * HIPRTC API functions return hiprtcResult to indicate the call - * result. - */ -typedef enum { - HIPRTC_SUCCESS = 0, - HIPRTC_ERROR_OUT_OF_MEMORY = 1, - HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, - HIPRTC_ERROR_INVALID_INPUT = 3, - HIPRTC_ERROR_INVALID_PROGRAM = 4, - HIPRTC_ERROR_INVALID_OPTION = 5, - HIPRTC_ERROR_COMPILATION = 6, - HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, - HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, - HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, - HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, - HIPRTC_ERROR_INTERNAL_ERROR = 11 +typedef enum hiprtcResult { + HIPRTC_SUCCESS = 0, + HIPRTC_ERROR_OUT_OF_MEMORY = 1, + HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, + HIPRTC_ERROR_INVALID_INPUT = 3, + HIPRTC_ERROR_INVALID_PROGRAM = 4, + HIPRTC_ERROR_INVALID_OPTION = 5, + HIPRTC_ERROR_COMPILATION = 6, + HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, + HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, + HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, + HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, + HIPRTC_ERROR_INTERNAL_ERROR = 11 } hiprtcResult; -/** - * \ingroup compilation - * \brief hiprtcProgram is the unit of compilation, and an opaque handle for - * a program. - * - * To compile a CUDA program string, an instance of hiprtcProgram must be - * created first with ::hiprtcCreateProgram, then compiled with - * ::hiprtcCompileProgram. - */ -typedef struct _hiprtcProgram *hiprtcProgram; +typedef struct _hiprtcProgram* hiprtcProgram; + +// stop: amd_detail/hiprtc.h #ifdef _WIN32 #define HIPRTCAPI __stdcall @@ -54,13 +39,12 @@ typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCADDNAMEEXPRESSION) (hiprtc typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCOMPILEPROGRAM) (hiprtcProgram, int, const char * const *); typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCREATEPROGRAM) (hiprtcProgram *, const char *, const char *, int, const char * const *, const char * const *); typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCDESTROYPROGRAM) (hiprtcProgram *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETCODE) (hiprtcProgram, char *); +typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETCODESIZE) (hiprtcProgram, size_t *); typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETLOWEREDNAME) (hiprtcProgram, const char * const, const char **); -typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTX) (hiprtcProgram, char *); -typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTXSIZE) (hiprtcProgram, size_t *); typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOG) (hiprtcProgram, char *); typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOGSIZE) (hiprtcProgram, size_t *); -typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING) (hiprtcResult); -typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCVERSION) (int *, int *); +typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING) (hiprtcResult); typedef struct hc_hiprtc_lib { @@ -70,13 +54,12 @@ typedef struct hc_hiprtc_lib HIPRTC_HIPRTCCOMPILEPROGRAM hiprtcCompileProgram; HIPRTC_HIPRTCCREATEPROGRAM hiprtcCreateProgram; HIPRTC_HIPRTCDESTROYPROGRAM hiprtcDestroyProgram; + HIPRTC_HIPRTCGETCODE hiprtcGetCode; + HIPRTC_HIPRTCGETCODESIZE hiprtcGetCodeSize; HIPRTC_HIPRTCGETLOWEREDNAME hiprtcGetLoweredName; - HIPRTC_HIPRTCGETPTX hiprtcGetCode; - HIPRTC_HIPRTCGETPTXSIZE hiprtcGetCodeSize; HIPRTC_HIPRTCGETPROGRAMLOG hiprtcGetProgramLog; HIPRTC_HIPRTCGETPROGRAMLOGSIZE hiprtcGetProgramLogSize; HIPRTC_HIPRTCGETERRORSTRING hiprtcGetErrorString; - HIPRTC_HIPRTCVERSION hiprtcVersion; } hc_hiprtc_lib_t; diff --git a/include/filehandling.h b/include/filehandling.h index 1d13097e4..db03ac456 100644 --- a/include/filehandling.h +++ b/include/filehandling.h @@ -15,8 +15,8 @@ int _wopen (const char *path, int oflag, ...); #endif -bool hc_fopen (HCFILE *fp, const char *path, char *mode); -bool hc_fopen_raw (HCFILE *fp, const char *path, char *mode); +bool hc_fopen (HCFILE *fp, const char *path, const char *mode); +bool hc_fopen_raw (HCFILE *fp, const char *path, const char *mode); int hc_fscanf (HCFILE *fp, const char *format, void *ptr); int hc_fprintf (HCFILE *fp, const char *format, ...); int hc_vfprintf (HCFILE *fp, const char *format, va_list ap); diff --git a/include/types.h b/include/types.h index ed22a95ee..cb4f10f44 100644 --- a/include/types.h +++ b/include/types.h @@ -1075,7 +1075,7 @@ typedef struct hc_fp bool is_zip; int bom_size; - char *mode; + const char *mode; const char *path; } HCFILE; @@ -1502,80 +1502,80 @@ typedef struct hc_device_param int hip_warp_size; - HIPdevice hip_device; - HIPcontext hip_context; - HIPstream hip_stream; + hipDevice_t hip_device; + hipCtx_t hip_context; + hipStream_t hip_stream; - HIPevent hip_event1; - HIPevent hip_event2; + hipEvent_t hip_event1; + hipEvent_t hip_event2; - HIPmodule hip_module; - HIPmodule hip_module_shared; - HIPmodule hip_module_mp; - HIPmodule hip_module_amp; + hipModule_t hip_module; + hipModule_t hip_module_shared; + hipModule_t hip_module_mp; + hipModule_t hip_module_amp; - HIPfunction hip_function1; - HIPfunction hip_function12; - HIPfunction hip_function2p; - HIPfunction hip_function2; - HIPfunction hip_function2e; - HIPfunction hip_function23; - HIPfunction hip_function3; - HIPfunction hip_function4; - HIPfunction hip_function_init2; - HIPfunction hip_function_loop2p; - HIPfunction hip_function_loop2; - HIPfunction hip_function_mp; - HIPfunction hip_function_mp_l; - HIPfunction hip_function_mp_r; - HIPfunction hip_function_amp; - HIPfunction hip_function_tm; - HIPfunction hip_function_memset; - HIPfunction hip_function_bzero; - HIPfunction hip_function_atinit; - HIPfunction hip_function_utf8toutf16le; - HIPfunction hip_function_decompress; - HIPfunction hip_function_aux1; - HIPfunction hip_function_aux2; - HIPfunction hip_function_aux3; - HIPfunction hip_function_aux4; + hipFunction_t hip_function1; + hipFunction_t hip_function12; + hipFunction_t hip_function2p; + hipFunction_t hip_function2; + hipFunction_t hip_function2e; + hipFunction_t hip_function23; + hipFunction_t hip_function3; + hipFunction_t hip_function4; + hipFunction_t hip_function_init2; + hipFunction_t hip_function_loop2p; + hipFunction_t hip_function_loop2; + hipFunction_t hip_function_mp; + hipFunction_t hip_function_mp_l; + hipFunction_t hip_function_mp_r; + hipFunction_t hip_function_amp; + hipFunction_t hip_function_tm; + hipFunction_t hip_function_memset; + hipFunction_t hip_function_bzero; + hipFunction_t hip_function_atinit; + hipFunction_t hip_function_utf8toutf16le; + hipFunction_t hip_function_decompress; + hipFunction_t hip_function_aux1; + hipFunction_t hip_function_aux2; + hipFunction_t hip_function_aux3; + hipFunction_t hip_function_aux4; - HIPdeviceptr hip_d_pws_buf; - HIPdeviceptr hip_d_pws_amp_buf; - HIPdeviceptr hip_d_pws_comp_buf; - HIPdeviceptr hip_d_pws_idx; - HIPdeviceptr hip_d_rules; - HIPdeviceptr hip_d_rules_c; - HIPdeviceptr hip_d_combs; - HIPdeviceptr hip_d_combs_c; - HIPdeviceptr hip_d_bfs; - HIPdeviceptr hip_d_bfs_c; - HIPdeviceptr hip_d_tm_c; - HIPdeviceptr hip_d_bitmap_s1_a; - HIPdeviceptr hip_d_bitmap_s1_b; - HIPdeviceptr hip_d_bitmap_s1_c; - HIPdeviceptr hip_d_bitmap_s1_d; - HIPdeviceptr hip_d_bitmap_s2_a; - HIPdeviceptr hip_d_bitmap_s2_b; - HIPdeviceptr hip_d_bitmap_s2_c; - HIPdeviceptr hip_d_bitmap_s2_d; - HIPdeviceptr hip_d_plain_bufs; - HIPdeviceptr hip_d_digests_buf; - HIPdeviceptr hip_d_digests_shown; - HIPdeviceptr hip_d_salt_bufs; - HIPdeviceptr hip_d_esalt_bufs; - HIPdeviceptr hip_d_tmps; - HIPdeviceptr hip_d_hooks; - HIPdeviceptr hip_d_result; - HIPdeviceptr hip_d_extra0_buf; - HIPdeviceptr hip_d_extra1_buf; - HIPdeviceptr hip_d_extra2_buf; - HIPdeviceptr hip_d_extra3_buf; - HIPdeviceptr hip_d_root_css_buf; - HIPdeviceptr hip_d_markov_css_buf; - HIPdeviceptr hip_d_st_digests_buf; - HIPdeviceptr hip_d_st_salts_buf; - HIPdeviceptr hip_d_st_esalts_buf; + hipDeviceptr_t hip_d_pws_buf; + hipDeviceptr_t hip_d_pws_amp_buf; + hipDeviceptr_t hip_d_pws_comp_buf; + hipDeviceptr_t hip_d_pws_idx; + hipDeviceptr_t hip_d_rules; + hipDeviceptr_t hip_d_rules_c; + hipDeviceptr_t hip_d_combs; + hipDeviceptr_t hip_d_combs_c; + hipDeviceptr_t hip_d_bfs; + hipDeviceptr_t hip_d_bfs_c; + hipDeviceptr_t hip_d_tm_c; + hipDeviceptr_t hip_d_bitmap_s1_a; + hipDeviceptr_t hip_d_bitmap_s1_b; + hipDeviceptr_t hip_d_bitmap_s1_c; + hipDeviceptr_t hip_d_bitmap_s1_d; + hipDeviceptr_t hip_d_bitmap_s2_a; + hipDeviceptr_t hip_d_bitmap_s2_b; + hipDeviceptr_t hip_d_bitmap_s2_c; + hipDeviceptr_t hip_d_bitmap_s2_d; + hipDeviceptr_t hip_d_plain_bufs; + hipDeviceptr_t hip_d_digests_buf; + hipDeviceptr_t hip_d_digests_shown; + hipDeviceptr_t hip_d_salt_bufs; + hipDeviceptr_t hip_d_esalt_bufs; + hipDeviceptr_t hip_d_tmps; + hipDeviceptr_t hip_d_hooks; + hipDeviceptr_t hip_d_result; + hipDeviceptr_t hip_d_extra0_buf; + hipDeviceptr_t hip_d_extra1_buf; + hipDeviceptr_t hip_d_extra2_buf; + hipDeviceptr_t hip_d_extra3_buf; + hipDeviceptr_t hip_d_root_css_buf; + hipDeviceptr_t hip_d_markov_css_buf; + hipDeviceptr_t hip_d_st_digests_buf; + hipDeviceptr_t hip_d_st_salts_buf; + hipDeviceptr_t hip_d_st_esalts_buf; // API: opencl @@ -1726,8 +1726,7 @@ typedef struct backend_ctx int rc_hip_init; int rc_hiprtc_init; - int hiprtc_driver_version; - int hip_driver_version; + int hip_driverVersion; // opencl @@ -1799,8 +1798,6 @@ typedef struct hwmon_ctx hm_attrs_t *hm_device; - ADLOD6MemClockState *od_clock_mem_status; - } hwmon_ctx_t; #if defined (__APPLE__) diff --git a/src/backend.c b/src/backend.c index a41012cd6..05cdeeac4 100644 --- a/src/backend.c +++ b/src/backend.c @@ -26,10 +26,10 @@ #include "terminal.h" #if defined (__linux__) -static const char *dri_card0_path = "/dev/dri/card0"; +static const char *const dri_card0_path = "/dev/dri/card0"; -static const char *drm_card0_vendor_path = "/sys/class/drm/card0/device/vendor"; -static const char *drm_card0_driver_path = "/sys/class/drm/card0/device/driver"; +static const char *const drm_card0_vendor_path = "/sys/class/drm/card0/device/vendor"; +static const char *const drm_card0_driver_path = "/sys/class/drm/card0/device/driver"; #endif static const u32 full01 = 0x01010101; @@ -980,11 +980,11 @@ int hiprtc_init (hashcat_ctx_t *hashcat_ctx) memset (hiprtc, 0, sizeof (HIPRTC_PTR)); #if defined (_WIN) - hiprtc->lib = hc_dlopen ("fixme.dll"); + hiprtc->lib = hc_dlopen ("amdhip64.dll"); #elif defined (__APPLE__) hiprtc->lib = hc_dlopen ("fixme.dylib"); #elif defined (__CYGWIN__) - hiprtc->lib = hc_dlopen ("fixme.dll"); + hiprtc->lib = hc_dlopen ("amdhip64.dll"); #else hiprtc->lib = hc_dlopen ("libamdhip64.so"); @@ -998,12 +998,11 @@ int hiprtc_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC (hiprtc, hiprtcCreateProgram, HIPRTC_HIPRTCCREATEPROGRAM, HIPRTC, 1); HC_LOAD_FUNC (hiprtc, hiprtcDestroyProgram, HIPRTC_HIPRTCDESTROYPROGRAM, HIPRTC, 1); HC_LOAD_FUNC (hiprtc, hiprtcGetLoweredName, HIPRTC_HIPRTCGETLOWEREDNAME, HIPRTC, 1); - HC_LOAD_FUNC (hiprtc, hiprtcGetCode, HIPRTC_HIPRTCGETPTX, HIPRTC, 1); - HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize, HIPRTC_HIPRTCGETPTXSIZE, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetCode, HIPRTC_HIPRTCGETCODE, HIPRTC, 1); + HC_LOAD_FUNC (hiprtc, hiprtcGetCodeSize, HIPRTC_HIPRTCGETCODESIZE, HIPRTC, 1); HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLog, HIPRTC_HIPRTCGETPROGRAMLOG, HIPRTC, 1); HC_LOAD_FUNC (hiprtc, hiprtcGetProgramLogSize, HIPRTC_HIPRTCGETPROGRAMLOGSIZE, HIPRTC, 1); HC_LOAD_FUNC (hiprtc, hiprtcGetErrorString, HIPRTC_HIPRTCGETERRORSTRING, HIPRTC, 1); - HC_LOAD_FUNC (hiprtc, hiprtcVersion, HIPRTC_HIPRTCVERSION, HIPRTC, 1); return 0; } @@ -1069,11 +1068,6 @@ int hc_hiprtcCompileProgram (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, int HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; - #if 0 - for(int i =0; i< numOptions; i++) - printf("Option_%d = %s\n", i, options[i]); - #endif - const hiprtcResult HIPRTC_err = hiprtc->hiprtcCompileProgram (prog, numOptions, options); if (HIPRTC_err != HIPRTC_SUCCESS) @@ -1122,13 +1116,13 @@ int hc_hiprtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char return 0; } -int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *ptxSizeRet) +int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t *codeSizeRet) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; - const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, ptxSizeRet); + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCodeSize (prog, codeSizeRet); if (HIPRTC_err != HIPRTC_SUCCESS) { @@ -1140,13 +1134,13 @@ int hc_hiprtcGetCodeSize (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, size_t return 0; } -int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx) +int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *code) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; - const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, ptx); + const hiprtcResult HIPRTC_err = hiprtc->hiprtcGetCode (prog, code); if (HIPRTC_err != HIPRTC_SUCCESS) { @@ -1158,24 +1152,6 @@ int hc_hiprtcGetCode (hashcat_ctx_t *hashcat_ctx, hiprtcProgram prog, char *ptx) return 0; } -int hc_hiprtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIPRTC_PTR *hiprtc = (HIPRTC_PTR *) backend_ctx->hiprtc; - - const hiprtcResult HIPRTC_err = hiprtc->hiprtcVersion (major, minor); - - if (HIPRTC_err != HIPRTC_SUCCESS) - { - event_log_error (hashcat_ctx, "hiprtcVersion(): %s", hiprtc->hiprtcGetErrorString (HIPRTC_err)); - - return -1; - } - - return 0; -} - // CUDA int cuda_init (hashcat_ctx_t *hashcat_ctx) @@ -2478,11 +2454,11 @@ int hip_init (hashcat_ctx_t *hashcat_ctx) memset (hip, 0, sizeof (HIP_PTR)); #if defined (_WIN) - hip->lib = hc_dlopen ("fixme.dll"); + hip->lib = hc_dlopen ("amdhip64.dll"); #elif defined (__APPLE__) hip->lib = hc_dlopen ("fixme.dylib"); #elif defined (__CYGWIN__) - hip->lib = hc_dlopen ("fixme.dll"); + hip->lib = hc_dlopen ("amdhip64.dll"); #else hip->lib = hc_dlopen ("libamdhip64.so"); @@ -2516,67 +2492,42 @@ int hip_init (hashcat_ctx_t *hashcat_ctx) HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipCtxGetCacheConfig, hipCtxGetCacheConfig, HIP_HIPCTXGETCACHECONFIG, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipCtxGetCurrent, hipCtxGetCurrent, HIP_HIPCTXGETCURRENT, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipCtxGetSharedMemConfig, hipCtxGetSharedMemConfig, HIP_HIPCTXGETSHAREDMEMCONFIG, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipCtxSetCacheConfig, hipCtxSetCacheConfig, HIP_HIPCTXSETCACHECONFIG, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipCtxSetSharedMemConfig, hipCtxSetSharedMemConfig, HIP_HIPCTXSETSHAREDMEMCONFIG, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipDeviceGet, hipDeviceGet, HIP_HIPDEVICEGET, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipDeviceGet, hipDeviceGet, HIP_HIPDEVICEGET, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreateWithFlags, HIP_HIPEVENTCREATE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipEventQuery, hipEventQuery, HIP_HIPEVENTQUERY, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipFuncSetAttribute, hipFuncSetAttribute, HIP_HIPFUNCSETATTRIBUTE, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipFuncSetCacheConfig, hipFuncSetCacheConfig, HIP_HIPFUNCSETCACHECONFIG, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipFuncSetSharedMemConfig, hipFuncSetSharedMemConfig, HIP_HIPFUNCSETSHAREDMEMCONFIG, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipMemAllocHost, hipMemAllocHost, HIP_HIPMEMALLOCHOST, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1); + HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD, hipMemcpyDtoD, HIP_HIPMEMCPYDTOD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync, hipMemcpyDtoDAsync, HIP_HIPMEMCPYDTODASYNC, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH, hipMemcpyDtoH, HIP_HIPMEMCPYDTOH, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync, hipMemcpyDtoHAsync, HIP_HIPMEMCPYDTOHASYNC, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD, hipMemcpyHtoD, HIP_HIPMEMCPYHTOD, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, HIP_HIPMEMCPYHTODASYNC, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipMemFreeHost, hipFreeHost, HIP_HIPMEMFREEHOST, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipModuleLoad, hipModuleLoad, HIP_HIPMODULELOAD, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipModuleLoadData, hipModuleLoadData, HIP_HIPMODULELOADDATA, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipProfilerStart, hipProfilerStart, HIP_HIPPROFILERSTART, HIP, 1); - //HC_LOAD_FUNC_HIP (hip, hipProfilerStop, hipProfilerStop, HIP_HIPPROFILERSTOP, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1); HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipStreamWaitEvent, hipStreamWaitEvent, HIP_HIPSTREAMWAITEVENT, HIP, 1); - //TODO HIP? - #if defined (WITH_CUBINX) - HC_LOAD_FUNC_HIP (hip, hipLinkCreate, hipLinkCreate, HIP_HIPLINKCREATE, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipLinkAddData, hipLinkAddData, HIP_HIPLINKADDDATA, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipLinkDestroy, hipLinkDestroy, HIP_HIPLINKDESTROY, HIP, 1); - HC_LOAD_FUNC_HIP (hip, hipLinkComplete, hipLinkComplete, HIP_HIPLINKCOMPLETE, HIP, 1); - #endif return 0; } @@ -2600,25 +2551,25 @@ void hip_close (hashcat_ctx_t *hashcat_ctx) } } -int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) +int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipInit (Flags); + const hipError_t HIP_err = hip->hipCtxCreate (pctx, flags, dev); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipInit(): %s", pStr); + event_log_error (hashcat_ctx, "hipCtxCreate(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipCtxCreate(): %d", HIP_err); } return -1; @@ -2627,19 +2578,181 @@ int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) return 0; } -int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev) +int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev); + const hipError_t HIP_err = hip->hipCtxDestroy (ctx); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipCtxDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t *pctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipCtxPopCurrent (pctx); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipCtxPushCurrent (ctx); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, hipCtx_t ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipCtxSetCurrent (ctx); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipCtxSynchronize (); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, hipDevice_t* device, int ordinal) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipDeviceGet (device, ordinal); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipDeviceGetAttribute (pi, attrib, dev); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipDeviceGetAttribute(): %s", pStr); } @@ -2660,13 +2773,13 @@ int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipDeviceGetCount (count); + const hipError_t HIP_err = hip->hipDeviceGetCount (count); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipDeviceGetCount(): %s", pStr); } @@ -2681,46 +2794,19 @@ int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count) return 0; } -int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice* device, int ordinal) +int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, hipDevice_t dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipDeviceGet (device, ordinal); + const hipError_t HIP_err = hip->hipDeviceGetName (name, len, dev); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipDeviceGet(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipDeviceGet(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipDeviceGetName (name, len, dev); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipDeviceGetName(): %s", pStr); } @@ -2735,19 +2821,19 @@ int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdev return 0; } -int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev) +int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, hipDevice_t dev) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipDeviceTotalMem (bytes, dev); + const hipError_t HIP_err = hip->hipDeviceTotalMem (bytes, dev); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipDeviceTotalMem(): %s", pStr); } @@ -2768,13 +2854,13 @@ int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipDriverGetVersion (driverVersion); + const hipError_t HIP_err = hip->hipDriverGetVersion (driverVersion); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipDriverGetVersion(): %s", pStr); } @@ -2789,645 +2875,19 @@ int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion) return 0; } -int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev) +int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipCtxCreate (pctx, flags, dev); + const hipError_t HIP_err = hip->hipEventCreate (phEvent, Flags); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipCtxCreate(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipCtxCreate(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipCtxDestroy (ctx); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipCtxDestroy(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipCtxDestroy(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipModuleUnload (hmod); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipCtxSetCurrent (ctx); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipCtxSetCurrent(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemAlloc (dptr, bytesize); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemFree (dptr); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount, HIPstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount, HIPstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t *bytes, HIPmodule hmod, const char *name) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipMemGetInfo (free, total); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -/* - -// ATTENTION, this one maps to cudaFuncSetAttribute not cuFuncSetAttribute !!! - -int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipFuncSetAttribute (hfunc, attrib, value); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipFuncSetAttribute(): %d", HIP_err); - } - - return -1; - } - - return 0; -} -*/ - -int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipStreamCreate (phStream, Flags); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipStreamDestroy (hStream); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipStreamSynchronize (hStream); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipCtxSynchronize (); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipCtxSynchronize(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipCtxSynchronize(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipEventCreate (phEvent, Flags); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipEventCreate(): %s", pStr); } @@ -3442,19 +2902,19 @@ int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned i return 0; } -int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipEventDestroy (hEvent); + const hipError_t HIP_err = hip->hipEventDestroy (hEvent); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipEventDestroy(): %s", pStr); } @@ -3469,19 +2929,19 @@ int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) return 0; } -int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd) +int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd); + const hipError_t HIP_err = hip->hipEventElapsedTime (pMilliseconds, hStart, hEnd); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipEventElapsedTime(): %s", pStr); } @@ -3496,46 +2956,19 @@ int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HI return 0; } -int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipEventQuery (hEvent); + const hipError_t HIP_err = hip->hipEventRecord (hEvent, hStream); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) - { - event_log_error (hashcat_ctx, "hipEventQuery(): %s", pStr); - } - else - { - event_log_error (hashcat_ctx, "hipEventQuery(): %d", HIP_err); - } - - return -1; - } - - return 0; -} - -int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream) -{ - backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; - - HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - - const HIPresult HIP_err = hip->hipEventRecord (hEvent, hStream); - - if (HIP_err != HIP_SUCCESS) - { - const char *pStr = NULL; - - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipEventRecord(): %s", pStr); } @@ -3550,19 +2983,19 @@ int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hS return 0; } -int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) +int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, hipEvent_t hEvent) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipEventSynchronize (hEvent); + const hipError_t HIP_err = hip->hipEventSynchronize (hEvent); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { event_log_error (hashcat_ctx, "hipEventSynchronize(): %s", pStr); } @@ -3577,25 +3010,25 @@ int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent) return 0; } -int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config) +int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipCtxSetCacheConfig (config); + const hipError_t HIP_err = hip->hipFuncGetAttribute (pi, attrib, hfunc); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %s", pStr); + event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipCtxSetCacheConfig(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipFuncGetAttribute(): %d", HIP_err); } return -1; @@ -3604,25 +3037,25 @@ int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config) return 0; } -int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) +int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipCtxPushCurrent (ctx); + const hipError_t HIP_err = hip->hipLaunchKernel (f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %s", pStr); + event_log_error (hashcat_ctx, "hipLaunchKernel(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipCtxPushCurrent(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipLaunchKernel(): %d", HIP_err); } return -1; @@ -3631,25 +3064,25 @@ int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx) return 0; } -int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx) +int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipCtxPopCurrent (pctx); + const hipError_t HIP_err = hip->hipInit (Flags); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %s", pStr); + event_log_error (hashcat_ctx, "hipInit(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipCtxPopCurrent(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipInit(): %d", HIP_err); } return -1; @@ -3658,25 +3091,25 @@ int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx) return 0; } -int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut) +int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipLinkCreate (numOptions, options, optionValues, stateOut); + const hipError_t HIP_err = hip->hipMemAlloc (dptr, bytesize); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipLinkCreate(): %s", pStr); + event_log_error (hashcat_ctx, "hipMemAlloc(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipLinkCreate(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipMemAlloc(): %d", HIP_err); } return -1; @@ -3685,25 +3118,25 @@ int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPji return 0; } -int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues) +int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dptr) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipLinkAddData (state, type, data, size, name, numOptions, options, optionValues); + const hipError_t HIP_err = hip->hipMemFree (dptr); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipLinkAddData(): %s", pStr); + event_log_error (hashcat_ctx, "hipMemFree(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipLinkAddData(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipMemFree(): %d", HIP_err); } return -1; @@ -3712,25 +3145,25 @@ int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInp return 0; } -int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state) +int hc_hipMemGetInfo (hashcat_ctx_t *hashcat_ctx, size_t *free, size_t *total) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipLinkDestroy (state); + const hipError_t HIP_err = hip->hipMemGetInfo (free, total); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipLinkDestroy(): %s", pStr); + event_log_error (hashcat_ctx, "hipMemGetInfo(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipLinkDestroy(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipMemGetInfo(): %d", HIP_err); } return -1; @@ -3739,25 +3172,349 @@ int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state) return 0; } -int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **hipbinOut, size_t *sizeOut) +int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount) { backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; - const HIPresult HIP_err = hip->hipLinkComplete (state, hipbinOut, sizeOut); + const hipError_t HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount); - if (HIP_err != HIP_SUCCESS) + if (HIP_err != hipSuccess) { const char *pStr = NULL; - if (hip->hipGetErrorString (HIP_err, &pStr) == HIP_SUCCESS) + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) { - event_log_error (hashcat_ctx, "hipLinkComplete(): %s", pStr); + event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr); } else { - event_log_error (hashcat_ctx, "hipLinkComplete(): %d", HIP_err); + event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyDtoHAsync (hashcat_ctx_t *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipMemcpyDtoHAsync (dstHost, srcDevice, ByteCount, hStream); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoHAsync(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyDtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipMemcpyDtoDAsync (dstDevice, srcDevice, ByteCount, hStream); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyDtoDAsync(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipMemcpyHtoDAsync (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipMemcpyHtoDAsync (dstDevice, srcHost, ByteCount, hStream); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipMemcpyHtoDAsync(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipModuleGetFunction (hfunc, hmod, name); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipModuleGetFunction(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleGetFunction(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleGetGlobal (hashcat_ctx_t *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipModuleGetGlobal (dptr, bytes, hmod, name); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleGetGlobal(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipModuleLoadDataEx (module, image, numOptions, options, optionValues); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleLoadDataEx(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, hipModule_t hmod) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipModuleUnload (hmod); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipModuleUnload(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipModuleUnload(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, hipStream_t *phStream, unsigned int Flags) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipStreamCreate (phStream, Flags); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipStreamCreate(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamCreate(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipStreamDestroy (hStream); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipStreamDestroy(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamDestroy(): %d", HIP_err); + } + + return -1; + } + + return 0; +} + +int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, hipStream_t hStream) +{ + backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx; + + HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip; + + const hipError_t HIP_err = hip->hipStreamSynchronize (hStream); + + if (HIP_err != hipSuccess) + { + const char *pStr = NULL; + + if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess) + { + event_log_error (hashcat_ctx, "hipStreamSynchronize(): %s", pStr); + } + else + { + event_log_error (hashcat_ctx, "hipStreamSynchronize(): %d", HIP_err); } return -1; @@ -5245,7 +5002,7 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device return 0; } -int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num) +int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num) { u64 num_elements = num; @@ -5256,7 +5013,7 @@ int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device num_elements = CEILDIV (num_elements, kernel_threads); - HIPfunction function = device_param->hip_function_atinit; + hipFunction_t function = device_param->hip_function_atinit; if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_atinit, NULL) == -1) return -1; @@ -5265,7 +5022,7 @@ int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device return 0; } -int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num) +int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 num) { u64 num_elements = num; @@ -5276,7 +5033,7 @@ int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t num_elements = CEILDIV (num_elements, kernel_threads); - HIPfunction function = device_param->hip_function_utf8toutf16le; + hipFunction_t function = device_param->hip_function_utf8toutf16le; if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_utf8toutf16le, NULL) == -1) return -1; @@ -5285,7 +5042,7 @@ int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t return 0; } -int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size) +int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u32 value, const u64 size) { const u64 num16d = size / 16; const u64 num16m = size % 16; @@ -5301,7 +5058,7 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device u64 num_elements = CEILDIV (num16d, kernel_threads); - HIPfunction function = device_param->hip_function_memset; + hipFunction_t function = device_param->hip_function_memset; if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_memset, NULL) == -1) return -1; } @@ -5321,7 +5078,7 @@ int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device return 0; } -int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size) +int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size) { const u64 num16d = size / 16; const u64 num16m = size % 16; @@ -5336,7 +5093,7 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_ u64 num_elements = CEILDIV(num16d, kernel_threads); - HIPfunction function = device_param->hip_function_bzero; + hipFunction_t function = device_param->hip_function_bzero; if (hc_hipLaunchKernel (hashcat_ctx, function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_bzero, NULL) == -1) return -1; } @@ -5686,7 +5443,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con if (device_param->is_hip == true) { - HIPfunction hip_function = NULL; + hipFunction_t hip_function = NULL; if (device_param->is_hip == true) { @@ -6014,7 +5771,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (device_param->is_hip == true) { - HIPfunction hip_function = NULL; + hipFunction_t hip_function = NULL; void **hip_args = NULL; @@ -6107,7 +5864,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) if (device_param->is_hip == true) { - HIPfunction hip_function = device_param->hip_function_tm; + hipFunction_t hip_function = device_param->hip_function_tm; if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements / kernel_threads, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_tm, NULL) == -1) return -1; @@ -6154,7 +5911,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, { num_elements = CEILDIV (num_elements, kernel_threads); - HIPfunction hip_function = device_param->hip_function_amp; + hipFunction_t hip_function = device_param->hip_function_amp; if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_amp, NULL) == -1) return -1; @@ -6205,7 +5962,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device { num_elements = CEILDIV (num_elements, kernel_threads); - HIPfunction hip_function = device_param->hip_function_decompress; + hipFunction_t hip_function = device_param->hip_function_decompress; if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, 0, device_param->hip_stream, device_param->kernel_params_decompress, NULL) == -1) return -1; @@ -7407,38 +7164,17 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) if ((rc_hip_init == 0) && (rc_hiprtc_init == 0)) { - // hiprtc version - - int hiprtc_major = 0; - int hiprtc_minor = 0; - - if (hc_hiprtcVersion (hashcat_ctx, &hiprtc_major, &hiprtc_minor) == -1) return -1; - - int hiprtc_driver_version = (hiprtc_major * 1000) + (hiprtc_minor * 10); - - backend_ctx->hiprtc_driver_version = hiprtc_driver_version; - - if (hiprtc_driver_version < 9000) - { - event_log_error (hashcat_ctx, "Outdated AMD HIPRTC driver version '%d' detected!", hiprtc_driver_version); - - event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions."); - event_log_warning (hashcat_ctx, NULL); - - return -1; - } - // hip version - int hip_driver_version = 10000; + int hip_driverVersion; - //if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driver_version) == -1) return -1; + if (hc_hipDriverGetVersion (hashcat_ctx, &hip_driverVersion) == -1) return -1; - backend_ctx->hip_driver_version = hip_driver_version; + backend_ctx->hip_driverVersion = hip_driverVersion; - if (hip_driver_version < 9000) + if (hip_driverVersion < 404) { - event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driver_version); + event_log_error (hashcat_ctx, "Outdated AMD HIP driver version '%d' detected!", hip_driverVersion); event_log_warning (hashcat_ctx, "See hashcat.net for officially supported AMD HIP versions."); event_log_warning (hashcat_ctx, NULL); @@ -7488,10 +7224,10 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) #if defined (__linux__) event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:"); - event_log_warning (hashcat_ctx, " \"RadeonOpenCompute (ROCm)\" Software Platform (3.1 or later)"); + event_log_warning (hashcat_ctx, " \"AMD ROCm\" (4.4 or later)"); #elif defined (_WIN) event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:"); - event_log_warning (hashcat_ctx, " \"AMD Radeon Adrenalin 2020 Edition\" (20.2.2 or later)"); + event_log_warning (hashcat_ctx, " \"AMD Radeon Adrenalin 2020 Edition\" (21.2.1 or later)"); #endif event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime:"); @@ -7813,10 +7549,10 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx) #if defined (__linux__) event_log_warning (hashcat_ctx, "* AMD GPUs on Linux require this driver:"); - event_log_warning (hashcat_ctx, " \"RadeonOpenCompute (ROCm)\" Software Platform (3.1 or later)"); + event_log_warning (hashcat_ctx, " \"AMD ROCm\" (4.4 or later)"); #elif defined (_WIN) event_log_warning (hashcat_ctx, "* AMD GPUs on Windows require this driver:"); - event_log_warning (hashcat_ctx, " \"AMD Radeon Adrenalin 2020 Edition\" (20.2.2 or later)"); + event_log_warning (hashcat_ctx, " \"AMD Radeon Adrenalin 2020 Edition\" (21.2.1 or later)"); #endif event_log_warning (hashcat_ctx, "* Intel CPUs require this runtime:"); @@ -8289,7 +8025,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) backend_ctx->backend_device_from_hip[hip_devices_idx] = backend_devices_idx; - HIPdevice hip_device; + hipDevice_t hip_device; if (hc_hipDeviceGet (hashcat_ctx, &hip_device, hip_devices_idx) == -1) { @@ -8328,7 +8064,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int device_processors = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, HIP_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_processors, hipDeviceAttributeMultiprocessorCount, hip_device) == -1) { device_param->skipped = true; continue; @@ -8356,7 +8092,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int hip_warp_size = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &hip_warp_size, hipDeviceAttributeWarpSize, hip_device) == -1) { device_param->skipped = true; continue; @@ -8369,13 +8105,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int sm_major = 0; int sm_minor = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_major, hipDeviceAttributeComputeCapabilityMajor, hip_device) == -1) { device_param->skipped = true; continue; } - if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, HIP_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &sm_minor, hipDeviceAttributeComputeCapabilityMinor, hip_device) == -1) { device_param->skipped = true; continue; @@ -8388,7 +8124,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int device_maxworkgroup_size = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, HIP_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxworkgroup_size, hipDeviceAttributeMaxThreadsPerBlock, hip_device) == -1) { device_param->skipped = true; continue; @@ -8400,7 +8136,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int device_maxclock_frequency = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, HIP_DEVICE_ATTRIBUTE_CLOCK_RATE, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_maxclock_frequency, hipDeviceAttributeClockRate, hip_device) == -1) { device_param->skipped = true; continue; @@ -8414,19 +8150,20 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int pci_bus_id_nv = 0; int pci_slot_id_nv = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, hip_device) == -1) + // Not supported by HIP + //if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_domain_id_nv, hipDeviceAttributePciDomainID, hip_device) == -1) + //{ + // device_param->skipped = true; + // continue; + //} + + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, hipDeviceAttributePciBusId, hip_device) == -1) { device_param->skipped = true; continue; } - if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_bus_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_BUS_ID, hip_device) == -1) - { - device_param->skipped = true; - continue; - } - - if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, HIP_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &pci_slot_id_nv, hipDeviceAttributePciDeviceId, hip_device) == -1) { device_param->skipped = true; continue; @@ -8434,6 +8171,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->pcie_domain = (u8) (pci_domain_id_nv); device_param->pcie_bus = (u8) (pci_bus_id_nv); + device_param->pcie_device = (u8) (pci_slot_id_nv >> 3); device_param->pcie_function = (u8) (pci_slot_id_nv & 7); @@ -8441,7 +8179,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int kernel_exec_timeout = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, HIP_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &kernel_exec_timeout, hipDeviceAttributeKernelExecTimeout, hip_device) == -1) { device_param->skipped = true; continue; @@ -8453,7 +8191,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int warp_size = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &warp_size, HIP_DEVICE_ATTRIBUTE_WARP_SIZE, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &warp_size, hipDeviceAttributeWarpSize, hip_device) == -1) { device_param->skipped = true; continue; @@ -8465,7 +8203,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int max_shared_memory_per_block = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, HIP_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &max_shared_memory_per_block, hipDeviceAttributeMaxSharedMemoryPerBlock, hip_device) == -1) { device_param->skipped = true; continue; @@ -8484,7 +8222,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) int device_max_constant_buffer_size = 0; - if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, HIP_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, hip_device) == -1) + if (hc_hipDeviceGetAttribute (hashcat_ctx, &device_max_constant_buffer_size, hipDeviceAttributeTotalConstantMemory, hip_device) == -1) { device_param->skipped = true; continue; @@ -8577,13 +8315,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) // instruction set - // bcrypt optimization? - //const int rc_cuCtxSetCacheConfig = hc_hipCtxSetCacheConfig (hashcat_ctx, HIP_FUNC_CACHE_PREFER_SHARED); - // - //if (rc_cuCtxSetCacheConfig == -1) return -1; - - // const int sm = (device_param->sm_major * 10) + device_param->sm_minor; - device_param->has_add = false; device_param->has_addc = false; device_param->has_sub = false; @@ -8595,9 +8326,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) // device_available_mem - HIPcontext hip_context; + hipCtx_t hip_context; - if (hc_hipCtxCreate (hashcat_ctx, &hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1) + if (hc_hipCtxCreate (hashcat_ctx, &hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1) { device_param->skipped = true; continue; @@ -9366,6 +9097,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) + { + // from https://www.khronos.org/registry/OpenCL/extensions/amd/cl_amd_device_attribute_query.txt + #define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 + + // crazy, but apple does not support this query! + // the best alternative is "Preferred work group size multiple (kernel)", but requires to specify a kernel. + // so we will set kernel_preferred_wgs_multiple intentionally to 0 because otherwise it it set to 8 by default. + // we then assign the value kernel_preferred_wgs_multiple a small kernel like bzero after test if this was set to 0. + + device_param->kernel_preferred_wgs_multiple = 0; + } + if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)) { cl_uint device_wavefront_width_amd; @@ -10284,7 +10028,7 @@ static int get_cuda_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, CUfunctio return 0; } -static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u32 *result) +static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u32 *result) { int max_threads_per_block; @@ -10295,7 +10039,7 @@ static int get_hip_kernel_wgs (hashcat_ctx_t *hashcat_ctx, HIPfunction function, return 0; } -static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, HIPfunction function, u64 *result) +static int get_hip_kernel_local_mem_size (hashcat_ctx_t *hashcat_ctx, hipFunction_t function, u64 *result) { int shared_size_bytes; @@ -10398,11 +10142,21 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param) } else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { - kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple); + if (device_param->kernel_preferred_wgs_multiple == 64) + { + // only older AMD GPUs with WaveFront size 64 benefit from this + + kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple); + } } else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) { - kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple); + if (device_param->kernel_preferred_wgs_multiple == 64) + { + // only older AMD GPUs with WaveFront size 64 benefit from this + + kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple); + } } } @@ -10414,7 +10168,7 @@ static u32 get_kernel_threads (const hc_device_param_t *device_param) return kernel_threads; } -static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, HIPmodule *hip_module) +static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module) { const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; const folder_config_t *folder_config = hashcat_ctx->folder_config; @@ -10706,14 +10460,12 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p //hiprtc_options[1] = "--device-as-default-execution-space"; //hiprtc_options[2] = "--gpu-architecture"; - hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple); + hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX)); - //hiprtc_options[0] = "--gpu-max-threads-per-block=64"; hiprtc_options[1] = "-nocudainc"; hiprtc_options[2] = "-nocudalib"; hiprtc_options[3] = ""; hiprtc_options[4] = ""; - hiprtc_options[5] = "-I"; hiprtc_options[6] = folder_config->cpath_real; @@ -10776,137 +10528,27 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p int mod_cnt = 6; - HIPjit_option mod_opts[7]; - void *mod_vals[7]; + hipJitOption mod_opts[6]; + void *mod_vals[6]; - mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + mod_opts[0] = hipJitOptionTargetFromContext; mod_vals[0] = (void *) 0; - mod_opts[1] = HIP_JIT_LOG_VERBOSE; + mod_opts[1] = hipJitOptionLogVerbose; mod_vals[1] = (void *) 1; - mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + mod_opts[2] = hipJitOptionInfoLogBuffer; mod_vals[2] = (void *) mod_info_log; - mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes; mod_vals[3] = (void *) LOG_SIZE; - mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + mod_opts[4] = hipJitOptionErrorLogBuffer; mod_vals[4] = (void *) mod_error_log; - mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes; mod_vals[5] = (void *) LOG_SIZE; - if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) - { - mod_opts[6] = HIP_JIT_MAX_REGISTERS; - mod_vals[6] = (void *) 128; - - mod_cnt++; - } - - #if defined (WITH_HIPBIN) - - char *jit_info_log = (char *) hcmalloc (LOG_SIZE + 1); - char *jit_error_log = (char *) hcmalloc (LOG_SIZE + 1); - - int jit_cnt = 6; - - HIPjit_option jit_opts[7]; - void *jit_vals[7]; - - jit_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; - jit_vals[0] = (void *) 0; - - jit_opts[1] = HIP_JIT_LOG_VERBOSE; - jit_vals[1] = (void *) 1; - - jit_opts[2] = HIP_JIT_INFO_LOG_BUFFER; - jit_vals[2] = (void *) jit_info_log; - - jit_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; - jit_vals[3] = (void *) LOG_SIZE; - - jit_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; - jit_vals[4] = (void *) jit_error_log; - - jit_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; - jit_vals[5] = (void *) LOG_SIZE; - - if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) - { - jit_opts[6] = HIP_JIT_MAX_REGISTERS; - jit_vals[6] = (void *) 128; - - jit_cnt++; - } - - HIPlinkState state; - - if (hc_cuLinkCreate (hashcat_ctx, jit_cnt, jit_opts, jit_vals, &state) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", jit_error_log); - event_log_error (hashcat_ctx, NULL); - - return false; - } - - if (hc_cuLinkAddData (hashcat_ctx, state, HIP_JIT_INPUT_CODE, binary, binary_size, kernel_name, 0, NULL, NULL) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", jit_error_log); - event_log_error (hashcat_ctx, NULL); - - return false; - } - - void *cubin = NULL; - - size_t cubin_size = 0; - - if (hc_cuLinkComplete (hashcat_ctx, state, &cubin, &cubin_size) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s link failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", jit_error_log); - event_log_error (hashcat_ctx, NULL); - - return false; - } - - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s link successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", jit_info_log); - event_log_info (hashcat_ctx, NULL); - #endif - - if (hc_cuModuleLoadDataEx (hashcat_ctx, hip_module, cubin, mod_cnt, mod_opts, mod_vals) == -1) - { - event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); - event_log_error (hashcat_ctx, "%s", mod_error_log); - event_log_error (hashcat_ctx, NULL); - - return false; - } - - #if defined (DEBUG) - event_log_info (hashcat_ctx, "* Device #%u: Kernel %s load successful. Info Log:", device_param->device_id + 1, source_file); - event_log_info (hashcat_ctx, "%s", mod_info_log); - event_log_info (hashcat_ctx, NULL); - #endif - - if (cache_disable == false) - { - if (write_kernel_binary (hashcat_ctx, cached_file, cubin, cubin_size) == false) return false; - } - - if (hc_hipLinkDestroy (hashcat_ctx, state) == -1) return false; - - hcfree (jit_info_log); - hcfree (jit_error_log); - - #else - if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, binary, mod_cnt, mod_opts, mod_vals) == -1) { event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); @@ -10927,8 +10569,6 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p if (write_kernel_binary (hashcat_ctx, cached_file, binary, binary_size) == false) return false; } - #endif - hcfree (mod_info_log); hcfree (mod_error_log); @@ -11076,35 +10716,27 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p int mod_cnt = 6; - HIPjit_option mod_opts[7]; - void *mod_vals[7]; + hipJitOption mod_opts[6]; + void *mod_vals[6]; - mod_opts[0] = HIP_JIT_TARGET_FROM_HIPCONTEXT; + mod_opts[0] = hipJitOptionTargetFromContext; mod_vals[0] = (void *) 0; - mod_opts[1] = HIP_JIT_LOG_VERBOSE; + mod_opts[1] = hipJitOptionLogVerbose; mod_vals[1] = (void *) 1; - mod_opts[2] = HIP_JIT_INFO_LOG_BUFFER; + mod_opts[2] = hipJitOptionInfoLogBuffer; mod_vals[2] = (void *) mod_info_log; - mod_opts[3] = HIP_JIT_INFO_LOG_BUFFER_SIZE_BYTES; + mod_opts[3] = hipJitOptionInfoLogBufferSizeBytes; mod_vals[3] = (void *) LOG_SIZE; - mod_opts[4] = HIP_JIT_ERROR_LOG_BUFFER; + mod_opts[4] = hipJitOptionErrorLogBuffer; mod_vals[4] = (void *) mod_error_log; - mod_opts[5] = HIP_JIT_ERROR_LOG_BUFFER_SIZE_BYTES; + mod_opts[5] = hipJitOptionErrorLogBufferSizeBytes; mod_vals[5] = (void *) LOG_SIZE; - if (hashconfig->opti_type & OPTI_TYPE_REGISTER_LIMIT) - { - mod_opts[6] = HIP_JIT_MAX_REGISTERS; - mod_vals[6] = (void *) 128; - - mod_cnt++; - } - if (hc_hipModuleLoadDataEx (hashcat_ctx, hip_module, kernel_sources[0], mod_cnt, mod_opts, mod_vals) == -1) { event_log_error (hashcat_ctx, "* Device #%u: Kernel %s load failed. Error Log:", device_param->device_id + 1, source_file); @@ -11495,7 +11127,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (device_param->is_hip == true) { - if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, HIP_CTX_SCHED_BLOCKING_SYNC, device_param->hip_device) == -1) + if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1) { device_param->skipped = true; continue; @@ -11559,7 +11191,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (device_param->is_hip == true) { - if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, HIP_STREAM_DEFAULT) == -1) + if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1) { device_param->skipped = true; continue; @@ -11591,13 +11223,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (device_param->is_hip == true) { - if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, HIP_EVENT_BLOCKING_SYNC) == -1) + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1) { device_param->skipped = true; continue; } - if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, HIP_EVENT_BLOCKING_SYNC) == -1) + if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1) { device_param->skipped = true; continue; @@ -11785,13 +11417,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) const size_t dnclen_amp_mp = snprintf (device_name_chksum_amp_mp, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%u", backend_ctx->comptime, backend_ctx->cuda_driver_version, - backend_ctx->hip_driver_version, + backend_ctx->hip_driverVersion, device_param->is_opencl, device_param->opencl_platform_vendor_id, device_param->device_name, device_param->opencl_device_version, device_param->opencl_driver_version, - (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple); + (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX)); md5_ctx_t md5_ctx; @@ -12023,6 +11655,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) if (get_opencl_kernel_preferred_wgs_multiple (hashcat_ctx, device_param, device_param->opencl_kernel_bzero, &device_param->kernel_preferred_wgs_multiple_bzero) == -1) return -1; + // apple hack, but perhaps also an alternative for other vendors + + if (device_param->kernel_preferred_wgs_multiple == 0) device_param->kernel_preferred_wgs_multiple = device_param->kernel_preferred_wgs_multiple_bzero; + // GPU autotune init if (hc_clCreateKernel (hashcat_ctx, device_param->opencl_program_shared, "gpu_atinit", &device_param->opencl_kernel_atinit) == -1) return -1; @@ -12113,7 +11749,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) const size_t dnclen = snprintf (device_name_chksum, HCBUFSIZ_TINY, "%d-%d-%d-%d-%u-%s-%s-%s-%d-%u-%u-%u-%s", backend_ctx->comptime, backend_ctx->cuda_driver_version, - backend_ctx->hip_driver_version, + backend_ctx->hip_driverVersion, device_param->is_opencl, device_param->opencl_platform_vendor_id, device_param->device_name, @@ -12122,7 +11758,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->vector_width, hashconfig->kern_type, extra_value, - (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_preferred_wgs_multiple, + (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX), build_options_module_buf); md5_ctx_t md5_ctx; @@ -14865,14 +14501,39 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) u32 kernel_accel_min = device_param->kernel_accel_min; u32 kernel_accel_max = device_param->kernel_accel_max; - /** - * We need a kernel accel limiter otherwise we will allocate too much memory (Example 4* GTX1080): - * 4 (gpus) * 260 (sizeof pw_t) * 3 (pws, pws_comp, pw_pre) * 20 (MCU) * 1024 (threads) * 1024 (accel) = 65,431,142,400 bytes RAM!! - */ + // We need to deal with the situation that the total video RAM > total host RAM. + // For the opposite direction, we do that in the loop section below. + // Especially in multi-GPU setups that is very likely. + // The buffers which actually take a lot of memory (except for SCRYPT) are the ones for the password candidates. + // They are stored in an aligned order for better performance, but this increases the memory pressure. + // The best way to keep these buffers to a reasonable size is by controlling the kernel_accel parameter. + // + // In theory this check could be disabled if we check if total video RAM < total host RAM, + // but at this point of initialization phase we don't have this information available. - const int max_gb = (hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1024 : 64; + // We need to hard-code some value, let's assume that (in 2021) the host has at least 8GB ram per active GPU - const u32 accel_limit = CEILDIV ((max_gb * 1024), kernel_threads); // this should result in less than 4GB per GPU, but allow higher accel in case user reduces the threads manually using -T + const u64 SIZE_8GB = 8ULL * 1024 * 1024 * 1024; + + u64 accel_limit = SIZE_8GB; + + // this is device_processors * kernel_threads + + accel_limit /= device_param->hardware_power; + + // single password candidate size + + accel_limit /= sizeof (pw_t); + + // pws[], pws_comp[] and pw_pre[] are some large blocks with password candidates + + accel_limit /= 3; + + // Is possible that the GPU simply has too much hardware resources and 8GB per GPU is not enough, but OTOH we can't get lower than 1 + + accel_limit = MAX (accel_limit, 1); + + // I think vector size is not required because vector_size is dividing the pws_cnt in run_kernel() kernel_accel_max = MIN (kernel_accel_max, accel_limit); @@ -14884,7 +14545,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) return -1; } - // find out if we would request too much memory on memory blocks which are based on kernel_accel + // Opposite direction check: find out if we would request too much memory on memory blocks which are based on kernel_accel u64 size_pws = 4; u64 size_pws_amp = 4; @@ -14899,12 +14560,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) u64 size_brain_link_out = 4; #endif - // instead of a thread limit we can also use a memory limit. - // this value should represent a reasonable amount of memory a host system has per GPU. - // note we're allocating 3 blocks of that size. - - const u64 PWS_SPACE = 1024ULL * 1024ULL * 1024ULL; - while (kernel_accel_max >= kernel_accel_min) { const u64 kernel_power_max = device_param->hardware_power * kernel_accel_max; @@ -14954,8 +14609,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) int memory_limit_hit = 0; - if (size_pws > PWS_SPACE) memory_limit_hit = 1; - // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate. // let's add some extra space just to be sure. // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit diff --git a/src/brain.c b/src/brain.c index 2962eaa2e..a12a1375b 100644 --- a/src/brain.c +++ b/src/brain.c @@ -666,7 +666,7 @@ u32 brain_auth_challenge (void) #else - static const char *urandom = "/dev/urandom"; + static const char *const urandom = "/dev/urandom"; HCFILE fp; diff --git a/src/ext_ADL.c b/src/ext_ADL.c index e54420713..9676c916f 100644 --- a/src/ext_ADL.c +++ b/src/ext_ADL.c @@ -50,27 +50,26 @@ int adl_init (void *hashcat_ctx) return -1; } - HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Adapter_AdapterInfo_Get, ADL_ADAPTER_ADAPTERINFO_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Display_DisplayInfo_Get, ADL_DISPLAY_DISPLAYINFO_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Adapter_ID_Get, ADL_ADAPTER_ID_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Adapter_VideoBiosInfo_Get, ADL_ADAPTER_VIDEOBIOSINFO_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive5_ThermalDevices_Enum, ADL_OVERDRIVE5_THERMALDEVICES_ENUM, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive5_Temperature_Get, ADL_OVERDRIVE5_TEMPERATURE_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive6_Temperature_Get, ADL_OVERDRIVE6_TEMPERATURE_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0); HC_LOAD_FUNC(adl, ADL_Overdrive5_CurrentActivity_Get, ADL_OVERDRIVE5_CURRENTACTIVITY_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Overdrive5_FanSpeedInfo_Get, ADL_OVERDRIVE5_FANSPEEDINFO_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Overdrive5_FanSpeed_Get, ADL_OVERDRIVE5_FANSPEED_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive6_FanSpeed_Get, ADL_OVERDRIVE6_FANSPEED_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive_Caps, ADL_OVERDRIVE_CAPS, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive5_ODParameters_Get, ADL_OVERDRIVE5_ODPARAMETERS_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive5_ODPerformanceLevels_Get, ADL_OVERDRIVE5_ODPERFORMANCELEVELS_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive5_Temperature_Get, ADL_OVERDRIVE5_TEMPERATURE_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive5_ThermalDevices_Enum, ADL_OVERDRIVE5_THERMALDEVICES_ENUM, ADL, 0); HC_LOAD_FUNC(adl, ADL_Overdrive6_Capabilities_Get, ADL_OVERDRIVE6_CAPABILITIES_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive6_StateInfo_Get, ADL_OVERDRIVE6_STATEINFO_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Overdrive6_CurrentStatus_Get, ADL_OVERDRIVE6_CURRENTSTATUS_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive6_TargetTemperatureData_Get, ADL_OVERDRIVE6_TARGETTEMPERATUREDATA_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Overdrive6_TargetTemperatureRangeInfo_Get, ADL_OVERDRIVE6_TARGETTEMPERATURERANGEINFO_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive6_FanSpeed_Get, ADL_OVERDRIVE6_FANSPEED_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive6_StateInfo_Get, ADL_OVERDRIVE6_STATEINFO_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive6_Temperature_Get, ADL_OVERDRIVE6_TEMPERATURE_GET, ADL, 0); + HC_LOAD_FUNC(adl, ADL_Overdrive_Caps, ADL_OVERDRIVE_CAPS, ADL, 0); + HC_LOAD_FUNC(adl, ADL2_Overdrive_Caps, ADL2_OVERDRIVE_CAPS, ADL, 1); + HC_LOAD_FUNC(adl, ADL2_New_QueryPMLogData_Get, ADL2_NEW_QUERYPMLOGDATA_GET, ADL, 1); return 0; } @@ -270,17 +269,42 @@ int hm_ADL_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *od_support return 0; } -int hm_ADL_Overdrive6_TargetTemperatureData_Get (void *hashcat_ctx, int iAdapterIndex, int *cur_temp, int *default_temp) +int hm_ADL2_Overdrive_Caps (void *hashcat_ctx, int iAdapterIndex, int *iSupported, int *iEnabled, int *iVersion) { hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx; ADL_PTR *adl = (ADL_PTR *) hwmon_ctx->hm_adl; - const int ADL_rc = adl->ADL_Overdrive6_TargetTemperatureData_Get (iAdapterIndex, cur_temp, default_temp); + // Not sure if that makes any sense... + + if (adl->ADL2_Overdrive_Caps == NULL) + { + return hm_ADL_Overdrive_Caps (hashcat_ctx, iAdapterIndex, iSupported, iEnabled, iVersion); + } + + const int ADL_rc = adl->ADL2_Overdrive_Caps (NULL, iAdapterIndex, iSupported, iEnabled, iVersion); if (ADL_rc != ADL_OK) { - event_log_error (hashcat_ctx, "ADL_Overdrive6_TargetTemperatureData_Get(): %d", ADL_rc); + event_log_error (hashcat_ctx, "ADL2_Overdrive_Caps(): %d", ADL_rc); + + return -1; + } + + return 0; +} + +int hm_ADL2_New_QueryPMLogData_Get (void *hashcat_ctx, int iAdapterIndex, ADLPMLogDataOutput *lpDataOutput) +{ + hwmon_ctx_t *hwmon_ctx = ((hashcat_ctx_t *) hashcat_ctx)->hwmon_ctx; + + ADL_PTR *adl = (ADL_PTR *) hwmon_ctx->hm_adl; + + const int ADL_rc = adl->ADL2_New_QueryPMLogData_Get (NULL, iAdapterIndex, lpDataOutput); + + if (ADL_rc != ADL_OK) + { + event_log_error (hashcat_ctx, "ADL2_New_QueryPMLogData_Get(): %d", ADL_rc); return -1; } diff --git a/src/filehandling.c b/src/filehandling.c index 54ba73369..9edd1cb35 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -21,7 +21,7 @@ int _wopen (const char *path, int oflag, ...) } #endif -bool hc_fopen (HCFILE *fp, const char *path, char *mode) +bool hc_fopen (HCFILE *fp, const char *path, const char *mode) { if (path == NULL || mode == NULL) return false; @@ -130,7 +130,7 @@ bool hc_fopen (HCFILE *fp, const char *path, char *mode) return true; } -bool hc_fopen_raw (HCFILE *fp, const char *path, char *mode) +bool hc_fopen_raw (HCFILE *fp, const char *path, const char *mode) { if (path == NULL || mode == NULL) return false; diff --git a/src/hlfmt.c b/src/hlfmt.c index 0e803445b..be71742e2 100644 --- a/src/hlfmt.c +++ b/src/hlfmt.c @@ -10,16 +10,16 @@ #include "hlfmt.h" #include "shared.h" -static const char *HLFMT_TEXT_HASHCAT = "native hashcat"; -static const char *HLFMT_TEXT_PWDUMP = "pwdump"; -static const char *HLFMT_TEXT_PASSWD = "passwd"; -static const char *HLFMT_TEXT_SHADOW = "shadow"; -static const char *HLFMT_TEXT_DCC = "DCC"; -static const char *HLFMT_TEXT_DCC2 = "DCC 2"; -static const char *HLFMT_TEXT_NETNTLM1 = "NetNTLMv1"; -static const char *HLFMT_TEXT_NETNTLM2 = "NetNTLMv2"; -static const char *HLFMT_TEXT_NSLDAP = "nsldap"; -static const char *HLFMT_TEXT_NSLDAPS = "nsldaps"; +static const char *const HLFMT_TEXT_HASHCAT = "native hashcat"; +static const char *const HLFMT_TEXT_PWDUMP = "pwdump"; +static const char *const HLFMT_TEXT_PASSWD = "passwd"; +static const char *const HLFMT_TEXT_SHADOW = "shadow"; +static const char *const HLFMT_TEXT_DCC = "DCC"; +static const char *const HLFMT_TEXT_DCC2 = "DCC 2"; +static const char *const HLFMT_TEXT_NETNTLM1 = "NetNTLMv1"; +static const char *const HLFMT_TEXT_NETNTLM2 = "NetNTLMv2"; +static const char *const HLFMT_TEXT_NSLDAP = "nsldap"; +static const char *const HLFMT_TEXT_NSLDAPS = "nsldaps"; // hlfmt hashcat diff --git a/src/hwmon.c b/src/hwmon.c index 374056d07..4e6a4f986 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -109,19 +109,7 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons } else if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6) { - int CurrentValue = 0; - int DefaultValue = 0; - if (hm_ADL_Overdrive6_TargetTemperatureData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &CurrentValue, &DefaultValue) == -1) - { - hwmon_ctx->hm_device[backend_device_idx].threshold_slowdown_get_supported = false; - - return -1; - } - - // the return value has never been tested since hm_ADL_Overdrive6_TargetTemperatureData_Get() never worked on any system. expect problems. - - return DefaultValue; } } } @@ -346,6 +334,22 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b return Temperature / 1000; } + + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].temperature_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_TEMPERATURE_EDGE].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -431,8 +435,37 @@ int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac if (hwmon_ctx->hm_device[backend_device_idx].od_version == 6) { + ADLOD6FanSpeedInfo lpFanSpeedInfo; + + memset (&lpFanSpeedInfo, 0, sizeof (lpFanSpeedInfo)); + + if (hm_ADL_Overdrive6_FanSpeed_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &lpFanSpeedInfo) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false; + hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false; + + return -1; + } + return 1; } + + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].fanpolicy_get_supported = false; + hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_FAN_PERCENTAGE].supported; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -542,6 +575,22 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back return faninfo.iFanSpeedPercent; } + + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].fanspeed_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_FAN_PERCENTAGE].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -617,18 +666,37 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back { if (hwmon_ctx->hm_adl) { - ADLPMActivity PMActivity; - - PMActivity.iSize = sizeof (ADLPMActivity); - - if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5) { - hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false; + ADLPMActivity PMActivity; - return -1; + PMActivity.iSize = sizeof (ADLPMActivity); + + if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false; + + return -1; + } + + return PMActivity.iCurrentBusLanes; } - return PMActivity.iCurrentBusLanes; + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_BUS_LANES].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -704,18 +772,37 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b { if (hwmon_ctx->hm_adl) { - ADLPMActivity PMActivity; - - PMActivity.iSize = sizeof (ADLPMActivity); - - if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5) { - hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false; + ADLPMActivity PMActivity; - return -1; + PMActivity.iSize = sizeof (ADLPMActivity); + + if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false; + + return -1; + } + + return PMActivity.iActivityPercent; } - return PMActivity.iActivityPercent; + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].utilization_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_INFO_ACTIVITY_GFX].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -808,18 +895,37 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b { if (hwmon_ctx->hm_adl) { - ADLPMActivity PMActivity; - - PMActivity.iSize = sizeof (ADLPMActivity); - - if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5) { - hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false; + ADLPMActivity PMActivity; - return -1; + PMActivity.iSize = sizeof (ADLPMActivity); + + if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false; + + return -1; + } + + return PMActivity.iMemoryClock / 100; } - return PMActivity.iMemoryClock / 100; + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].memoryspeed_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_CLK_MEMCLK].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -895,18 +1001,37 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac { if (hwmon_ctx->hm_adl) { - ADLPMActivity PMActivity; - - PMActivity.iSize = sizeof (ADLPMActivity); - - if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 5) { - hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false; + ADLPMActivity PMActivity; - return -1; + PMActivity.iSize = sizeof (ADLPMActivity); + + if (hm_ADL_Overdrive_CurrentActivity_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &PMActivity) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false; + + return -1; + } + + return PMActivity.iEngineClock / 100; } - return PMActivity.iEngineClock / 100; + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].corespeed_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_CLK_GFXCLK].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) @@ -1400,7 +1525,9 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) int od_enabled = 0; int od_version = 0; - hm_ADL_Overdrive_Caps (hashcat_ctx, lpAdapterInfo[i].iAdapterIndex, &od_supported, &od_enabled, &od_version); + hm_ADL2_Overdrive_Caps (hashcat_ctx, lpAdapterInfo[i].iAdapterIndex, &od_supported, &od_enabled, &od_version); + + if (od_version < 8) od_version = 5; hm_adapters_adl[device_id].od_version = od_version; @@ -1534,12 +1661,6 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx) hwmon_ctx->enabled = true; - /** - * save buffer required for later restores - */ - - hwmon_ctx->od_clock_mem_status = (ADLOD6MemClockState *) hccalloc (backend_ctx->backend_devices_cnt, sizeof (ADLOD6MemClockState)); - /** * HM devices: copy */ @@ -1795,8 +1916,6 @@ void hwmon_ctx_destroy (hashcat_ctx_t *hashcat_ctx) // free memory - hcfree (hwmon_ctx->od_clock_mem_status); - hcfree (hwmon_ctx->hm_device); memset (hwmon_ctx, 0, sizeof (hwmon_ctx_t)); diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c index ea01dab96..dc7b7b47e 100644 --- a/src/modules/module_01500.c +++ b/src/modules/module_01500.c @@ -184,7 +184,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) { - hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); + hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff); + } + else + { + hc_asprintf (&jit_build_options, "-D _unroll -fno-experimental-new-pass-manager"); } } else diff --git a/src/modules/module_07500.c b/src/modules/module_07500.c index 1681fb4a8..7cb6e04e4 100644 --- a/src/modules/module_07500.c +++ b/src/modules/module_07500.c @@ -65,35 +65,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c index 3e3158c0a..28963a1fc 100644 --- a/src/modules/module_09000.c +++ b/src/modules/module_09000.c @@ -22,7 +22,8 @@ static const u64 KERN_TYPE = 9000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_BINARY_HASHFILE - | OPTS_TYPE_AUTODETECT_DISABLE; + | OPTS_TYPE_AUTODETECT_DISABLE + | OPTS_TYPE_DYNAMIC_SHARED; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "0a3f352686e5eb5be173e668a4fff5cd5df420927e1da2d5d4052340160637e3e6a5a92841a188ed240e13b919f3d91694bd4c0acba79271e9c08a83ea5ad387cbb74d5884066a1cb5a8caa80d847079168f84823847c631dbe3a834f1bc496acfebac3bff1608bf1c857717f8f428e07b5e2cb12aaeddfa83d7dcb6d840234d08b84f8ca6c6e562af73eea13148f7902bcaf0220d3e36eeeff1d37283dc421483a2791182614ebb"; @@ -75,16 +76,25 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; + // this mode heavily depends on the available shared memory size + // note the kernel need to have some special code changes in order to make use to use post-48k memory region + // we need to set some macros + + bool use_dynamic = false; + + if (device_param->is_cuda == true) + { + use_dynamic = true; + } + // this uses some nice feedback effect. // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result. // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1. - u32 fixed_local_size = 0; - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - fixed_local_size = 1; + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", 1); } else { @@ -100,29 +110,58 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY if (device_param->is_opencl == true) { - overhead = 4; + overhead = 1; } } if (user_options->kernel_threads_chgd == true) { - fixed_local_size = user_options->kernel_threads; + u32 fixed_local_size = user_options->kernel_threads; - // otherwise out-of-bound reads - - if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead)) + if (use_dynamic == true) { - fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + if ((fixed_local_size * 4096) > device_param->kernel_dynamic_local_mem_size_memset) + { + // otherwise out-of-bound reads + + fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size); + } + else + { + if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead)) + { + // otherwise out-of-bound reads + + fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); } } else { - fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + if (use_dynamic == true) + { + // using kernel_dynamic_local_mem_size_memset is a bit hackish. + // we had to brute-force this value out of an already loaded CUDA function. + // there's no official way to query for this value. + + const u32 fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096; + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size); + } + else + { + const u32 fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); + } } } - hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); - return jit_build_options; } diff --git a/src/modules/module_09700.c b/src/modules/module_09700.c index cad911186..626f65f8f 100644 --- a/src/modules/module_09700.c +++ b/src/modules/module_09700.c @@ -69,21 +69,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_09710.c b/src/modules/module_09710.c index 033f77ea0..aa63f6905 100644 --- a/src/modules/module_09710.c +++ b/src/modules/module_09710.c @@ -69,21 +69,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_09720.c b/src/modules/module_09720.c index 04e99201f..e4be9f8b0 100644 --- a/src/modules/module_09720.c +++ b/src/modules/module_09720.c @@ -70,21 +70,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_09800.c b/src/modules/module_09800.c index 2eb7fab05..357ecb80b 100644 --- a/src/modules/module_09800.c +++ b/src/modules/module_09800.c @@ -71,21 +71,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_09810.c b/src/modules/module_09810.c index 2a1074b2c..9d3edd8d2 100644 --- a/src/modules/module_09810.c +++ b/src/modules/module_09810.c @@ -70,21 +70,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_09820.c b/src/modules/module_09820.c index ea3dfe22b..9997992e4 100644 --- a/src/modules/module_09820.c +++ b/src/modules/module_09820.c @@ -72,21 +72,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_10400.c b/src/modules/module_10400.c index 77416f5ce..30c95e68a 100644 --- a/src/modules/module_10400.c +++ b/src/modules/module_10400.c @@ -76,21 +76,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_10410.c b/src/modules/module_10410.c index b2c98363f..25be1feb4 100644 --- a/src/modules/module_10410.c +++ b/src/modules/module_10410.c @@ -77,21 +77,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_10420.c b/src/modules/module_10420.c index 23e537bf5..eecf97f2c 100644 --- a/src/modules/module_10420.c +++ b/src/modules/module_10420.c @@ -76,21 +76,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_10500.c b/src/modules/module_10500.c index 80a8478ef..d261e4458 100644 --- a/src/modules/module_10500.c +++ b/src/modules/module_10500.c @@ -100,21 +100,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - native_threads = 64; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - native_threads = 64; + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_13100.c b/src/modules/module_13100.c index fac5cb24c..2b4e0e846 100644 --- a/src/modules/module_13100.c +++ b/src/modules/module_13100.c @@ -64,35 +64,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_17200.c b/src/modules/module_17200.c index 44798110d..4577fb5f5 100644 --- a/src/modules/module_17200.c +++ b/src/modules/module_17200.c @@ -102,7 +102,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_ARCHIVE; static const char *HASH_NAME = "PKZIP (Compressed)"; static const u64 KERN_TYPE = 17200; static const u32 OPTI_TYPE = 0; -static const u64 OPTS_TYPE = 0; +static const u64 OPTS_TYPE = OPTS_TYPE_NATIVE_THREADS; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$pkzip2$1*1*2*0*e3*1c5*eda7a8de*0*28*8*e3*eda7*5096*a9fc1f4e951c8fb3031a6f903e5f4e3211c8fdc4671547bf77f6f682afbfcc7475d83898985621a7af9bccd1349d1976500a68c48f630b7f22d7a0955524d768e34868880461335417ddd149c65a917c0eb0a4bf7224e24a1e04cf4ace5eef52205f4452e66ded937db9545f843a68b1e84a2e933cc05fb36d3db90e6c5faf1bee2249fdd06a7307849902a8bb24ec7e8a0886a4544ca47979a9dfeefe034bdfc5bd593904cfe9a5309dd199d337d3183f307c2cb39622549a5b9b8b485b7949a4803f63f67ca427a0640ad3793a519b2476c52198488e3e2e04cac202d624fb7d13c2*$/pkzip2$"; @@ -170,11 +170,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 // workaround would be to rewrite kernel to use global memory - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - return true; - } - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { return true; diff --git a/src/modules/module_17220.c b/src/modules/module_17220.c index 9028040d9..5ad6f6050 100644 --- a/src/modules/module_17220.c +++ b/src/modules/module_17220.c @@ -102,7 +102,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_ARCHIVE; static const char *HASH_NAME = "PKZIP (Compressed Multi-File)"; static const u64 KERN_TYPE = 17220; static const u32 OPTI_TYPE = 0; -static const u64 OPTS_TYPE = 0; +static const u64 OPTS_TYPE = OPTS_TYPE_NATIVE_THREADS; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$pkzip2$3*1*1*0*8*24*a425*8827*d1730095cd829e245df04ebba6c52c0573d49d3bbeab6cb385b7fa8a28dcccd3098bfdd7*1*0*8*24*2a74*882a*51281ac874a60baedc375ca645888d29780e20d4076edd1e7154a99bde982152a736311f*2*0*e3*1c5*eda7a8de*0*29*8*e3*eda7*5096*1455781b59707f5151139e018bdcfeebfc89bc37e372883a7ec0670a5eafc622feb338f9b021b6601a674094898a91beac70e41e675f77702834ca6156111a1bf7361bc9f3715d77dfcdd626634c68354c6f2e5e0a7b1e1ce84a44e632d0f6e36019feeab92fb7eac9dda8df436e287aafece95d042059a1b27d533c5eab62c1c559af220dc432f2eb1a38a70f29e8f3cb5a207704274d1e305d7402180fd47e026522792f5113c52a116d5bb25b67074ffd6f4926b221555234aabddc69775335d592d5c7d22462b75de1259e8342a9ba71cb06223d13c7f51f13be2ad76352c3b8ed*$/pkzip2$"; @@ -170,11 +170,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 // workaround would be to rewrite kernel to use global memory - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - return true; - } - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { return true; diff --git a/src/modules/module_17225.c b/src/modules/module_17225.c index 75c376c9e..65128552c 100644 --- a/src/modules/module_17225.c +++ b/src/modules/module_17225.c @@ -102,7 +102,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_ARCHIVE; static const char *HASH_NAME = "PKZIP (Mixed Multi-File)"; static const u64 KERN_TYPE = 17225; static const u32 OPTI_TYPE = 0; -static const u64 OPTS_TYPE = 0; +static const u64 OPTS_TYPE = OPTS_TYPE_NATIVE_THREADS; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$pkzip2$3*1*1*0*0*24*3e2c*3ef8*0619e9d17ff3f994065b99b1fa8aef41c056edf9fa4540919c109742dcb32f797fc90ce0*1*0*8*24*431a*3f26*18e2461c0dbad89bd9cc763067a020c89b5e16195b1ac5fa7fb13bd246d000b6833a2988*2*0*23*17*1e3c1a16*2e4*2f*0*23*1e3c*3f2d*54ea4dbc711026561485bbd191bf300ae24fa0997f3779b688cdad323985f8d3bb8b0c*$/pkzip2$"; @@ -170,11 +170,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE // it leads to CL_KERNEL_WORK_GROUP_SIZE to return 0 and later we will divide with 0 // workaround would be to rewrite kernel to use global memory - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - return true; - } - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) { return true; diff --git a/src/modules/module_18200.c b/src/modules/module_18200.c index e6596306b..f4d32695e 100644 --- a/src/modules/module_18200.c +++ b/src/modules/module_18200.c @@ -64,35 +64,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/modules/module_18600.c b/src/modules/module_18600.c index c24ea37fe..8f2d05f4c 100644 --- a/src/modules/module_18600.c +++ b/src/modules/module_18600.c @@ -21,7 +21,8 @@ static const char *HASH_NAME = "Open Document Format (ODF) 1.1 (SHA-1, Blow static const u64 KERN_TYPE = 18600; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_DYNAMIC_SHARED; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$odf$*0*0*1024*16*bff753835f4ea15644b8a2f8e4b5be3d147b9576*8*ee371da34333b69d*16*a902eff54a4d782a26a899a31f97bef4*0*dae7e41fbc3a500d3ce152edd8876c4f38fb17d673ee2ac44ef1e0e283622cd2ae298a82d8d98f2ea737247881fc353e73a2f535c6e13e0cdc60821c1a61c53a4b0c46ff3a3b355d7b793fad50de15999fc7c1194321d1c54316c3806956c4a3ade7daabb912a2a36398eba883af088b3cb69b43365d9ba9fce3fb0c1524f73947a7e9fc1bf3adb5f85a367035feacb5d97c578b037144c2793f34aa09dcd04bdaa455aee0d4c52fe377248611dd56f2bd4eb294673525db905f5d905a28dec0909348e6bf94bcebf03ddd61a48797cd5728ce6dbb71037b268f526e806401abcf495f6edd0b5d87118671ec690d4627f86a43e51c7f6d42a75a56eec51204d47e115e813ed4425c97b16b195e02ce776c185194b9de43ae89f356e29face016cb393d6fb93af8ea305d921d5592dd184051ac790b9b90266f52b8d53ce1cb1d762942d6d5bbd0e3821be21af9fa6874ba0c60e64f41d3e5b6caca1c53b575afdc5d8f6a3edbf874dbe009c6cb296466fe9637aed4aed8a43a95ea7d26b4090ad33d4ee7a83844b0893e8bc0f04944205fb9576cb5720f019028cd75ca9ac47b3e5fa231354d74135564df43b659cfaea7e195c4a896e0e0e0c85dc9ce3a9ce9ba552bc2a6dbac4901c19558818e1957ed72d78662bb5ba53475ca584371f1825ae0c92322a4404e63c2baad92665aac29b5c6f96e1e6338d48fb0aef4d0b686063974f58b839484f8dcf0a02537cba67a7d2c4de13125d74820cb07ec72782035af1ea6c4db61c77016d1c021b63c8b07adb4e8510f5c41bbc501f60f3dd16462399b52eb146787e38e700147c7aa23ac4d5d22d9d1c93e67a01c92a197d4765cbf8d56a862a1205abb450a182913a69b8d5334a59924f86fb3ccd0dcfe7426053e26ba26b57c05f38d85863fff1f81135b0366e8cd8680663ae8aaf7d005317b849d5e08be882708fa0d8d02d47e89150124b507c34845c922b95e62aa0b3fef218773d7aeb572c67b35ad8787f31ecc6e1846b673b8ba6172223176eabf0020b6aa3aa71405b40b2fc2127bf9741a103f1d8eca21bf27328cdf15153f2f223eff7b831a72ed8ecacf4ea8df4ea44f3a3921e5a88fb2cfa355ece0f05cbc88fdd1ecd368d6e3b2dfabd999e5b708f1bccaeebb296c9d7b76659967742fe966aa6871cbbffe710b0cd838c6e02e6eb608cb5c81d066b60b5b3604396331d97d4a2c4c2317406e48c9f5387a2c72511d1e6899bd450e9ca88d535755bcfddb53a6df118cd9cdc7d8b4b814f7bc17684d8e5975defaa25d06f410ed0724c16b8f69ec3869bc1f05c71483666968d1c04509875dadd72c6182733d564eb1a7d555dc34f6b817c5418626214d0b2c3901c5a46f5b20fddfdf9f71a7dfd75b9928778a3f65e1832dff22be973c2b259744d500a3027c2a2e08972eaaad4c5c4ec871"; @@ -66,16 +67,25 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; + // this mode heavily depends on the available shared memory size + // note the kernel need to have some special code changes in order to make use to use post-48k memory region + // we need to set some macros + + bool use_dynamic = false; + + if (device_param->is_cuda == true) + { + use_dynamic = true; + } + // this uses some nice feedback effect. // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result. // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1. - u32 fixed_local_size = 0; - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - fixed_local_size = 1; + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", 1); } else { @@ -91,29 +101,58 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY if (device_param->is_opencl == true) { - overhead = 4; + overhead = 1; } } if (user_options->kernel_threads_chgd == true) { - fixed_local_size = user_options->kernel_threads; + u32 fixed_local_size = user_options->kernel_threads; - // otherwise out-of-bound reads - - if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead)) + if (use_dynamic == true) { - fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + if ((fixed_local_size * 4096) > device_param->kernel_dynamic_local_mem_size_memset) + { + // otherwise out-of-bound reads + + fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u -D DYNAMIC_LOCAL", fixed_local_size); + } + else + { + if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead)) + { + // otherwise out-of-bound reads + + fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", fixed_local_size); } } else { - fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + if (use_dynamic == true) + { + // using kernel_dynamic_local_mem_size_memset is a bit hackish. + // we had to brute-force this value out of an already loaded CUDA function. + // there's no official way to query for this value. + + const u32 fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096; + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u -D DYNAMIC_LOCAL", fixed_local_size); + } + else + { + const u32 fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE_COMP=%u", fixed_local_size); + } } } - hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); - return jit_build_options; } diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c index a3f3a00c2..c256d5d5c 100644 --- a/src/modules/module_21800.c +++ b/src/modules/module_21800.c @@ -24,7 +24,8 @@ static const u64 KERN_TYPE = 21800; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_USES_BITS_64 | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$electrum$5*02170fee7c35f1ef3b229edc90fbd0793b688a0d6f41137a97aab2343d315cce16*94cf72d8f5d774932b414a3344984859e43721268d2eb35fa531de5a2fc7024b463c730a54f4f46229dd9fede5034b19ac415c2916e9c16b02094f845795df0c397ff76d597886b1f9e014ad1a8f64a3f617d9900aa645b3ba86f16ce542251fc22c41d93fa6bc118be96d9582917e19d2a299743331804cfc7ce2c035367b4cbcfb70adfb1e10a0f2795769f2165d8fd13daa8b45eeac495b5b63e91a87f63b42e483f84a881e49adecacf6519cb564694b42dd9fe80fcbc6cdb63cf5ae33f35255266f5c2524dd93d3cc15eba0f2ccdc3c109cc2d7e8f711b8b440f168caf8b005e8bcdfe694148e94a04d2a738f09349a96600bd8e8edae793b26ebae231022f24e96cb158db141ac40400a9e9ef099e673cfe017281537c57f82fb45c62bdb64462235a6eefb594961d5eb2c46537958e4d04250804c6e9f343ab7a0db07af6b8a9d1a6c5cfcd311b8fb8383ac9ed9d98d427d526c2f517fc97473bd87cb59899bd0e8fb8c57fa0f7e0d53daa57c972cf92764af4b1725a5fb8f504b663ec519731929b3caaa793d8ee74293eee27d0e208a60e26290bc546e6fa9ed865076e13febfea249729218c1b5752e912055fbf993fbac5df2cca2b37c5e0f9c30789858ceeb3c482a8db123966775aeed2eee2fc34efb160d164929f51589bff748ca773f38978bff3508d5a7591fb2d2795df983504a788071f469d78c88fd7899cabbc5804f458653d0206b82771a59522e1fa794d7de1536c51a437f5d6df5efd6654678e5794ca429b5752e1103340ed80786f1e9da7f5b39af628b2212e4d88cd36b8a7136d50a6b6e275ab406ba7c57cc70d77d01c4c16e9363901164fa92dc9e9b99219d5376f24862e775968605001e71b000e2c7123b4b43f3ca40db17efd729388782e46e64d43ccb947db4eb1473ff1a3836b74fe312cd1a33b73b8b8d80c087088932277773c329f2f66a01d6b3fc1e651c56959ebbed7b14a21b977f3acdedf1a0d98d519a74b50c39b3052d840106da4145345d86ec0461cddafacc2a4f0dd646457ad05bf04dcbcc80516a5c5ed14d2d639a70e77b686f19cbfb63f546d81ae19cc8ba35cce3f3b5b9602df25b678e14411fecec87b8347f5047513df415c6b1a3d39871a6bcb0f67d9cf8311596deae45fd1d84a04fd58f1fd55c5156b7309af09094c99a53674809cb87a45f95a2d69f9997a38085519cb4e056f9efd56672a2c1fe927d5ea8eec25b8aff6e56f9a2310f1a481daf407b8adf16201da267c59973920fd21bb087b88123ef98709839d6a3ee34efb8ccd5c15ed0e46cff3172682769531164b66c8689c35a26299dd26d09233d1f64f9667474141cf9c6a6de7f2bc52c3bb44cfe679ff4b912c06df406283836b3581773cb76d375304f46239da5996594a8d03b14c02f1b35a432dc44a96331242ae31174*33a7ee59d6d17ed1ee99dc0a71771227e6f3734b17ba36eb589bdced56244135"; diff --git a/src/modules/module_25000.c b/src/modules/module_25000.c new file mode 100644 index 000000000..deac74055 --- /dev/null +++ b/src/modules/module_25000.c @@ -0,0 +1,342 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" +#include "memory.h" +#include "emu_inc_hash_md5.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; // 4_3 +static const u32 HASH_CATEGORY = HASH_CATEGORY_NETWORK_PROTOCOL; +static const char *HASH_NAME = "SNMPv3 HMAC-MD5-96/HMAC-SHA1-96"; +static const u64 KERN_TYPE = 25000; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const char *ST_PASS = "hashcat1"; +static const char *ST_HASH = "$SNMPv3$0$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +static const char *SIGNATURE_SNMPV3 = "$SNMPv3$0$"; + +#define SNMPV3_SALT_MAX 1500 +#define SNMPV3_ENGINEID_MAX 34 +#define SNMPV3_MSG_AUTH_PARAMS_LEN 12 +#define SNMPV3_ROUNDS 1048576 +#define SNMPV3_MAX_PW_LENGTH 64 + +#define SNMPV3_TMP_ELEMS 4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32) +#define SNMPV3_HASH_ELEMS_MD5 4 +#define SNMPV3_HASH_ELEMS_SHA1 8 + +#define SNMPV3_MAX_SALT_ELEMS 512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64 +#define SNMPV3_MAX_ENGINE_ELEMS 16 // 16 * 4 = 64 > 32, also has to be multiple of 64 +#define SNMPV3_MAX_PNUM_ELEMS 4 // 4 * 4 = 16 > 9 + +typedef struct hmac_md5_tmp +{ + u32 tmp_md5[SNMPV3_TMP_ELEMS]; + u32 tmp_sha1[SNMPV3_TMP_ELEMS]; + + u32 h_md5[SNMPV3_HASH_ELEMS_MD5]; + u32 h_sha1[SNMPV3_HASH_ELEMS_SHA1]; + +} hmac_md5_tmp_t; + +typedef struct snmpv3 +{ + u32 salt_buf[SNMPV3_MAX_SALT_ELEMS]; + u32 salt_len; + + u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS]; + u32 engineID_len; + + u32 packet_number[SNMPV3_MAX_PNUM_ELEMS]; + +} snmpv3_t; + +u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 pw_min = 8; + + return pw_min; +} + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (snmpv3_t); + + return esalt_size; +} + +u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 tmp_size = (const u64) sizeof (hmac_md5_tmp_t); + + return tmp_size; +} + +u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + // we need to fix iteration count to guarantee the loop count is a multiple of 64 + // 2k calls to md5_transform/sha1_transform typically is enough to overtime pcie bottleneck + + const u32 kernel_loops_min = 2048 * 64; + + return kernel_loops_min; +} + +u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 kernel_loops_max = 2048 * 64; + + return kernel_loops_max; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf; + + token_t token; + + token.token_cnt = 5; + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_SNMPV3; + + token.len[0] = 10; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + // packet number + token.len_min[1] = 1; + token.len_max[1] = 8; + token.sep[1] = '$'; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + // salt + token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2; + token.len_max[2] = SNMPV3_SALT_MAX * 2; + token.sep[2] = '$'; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // engineid + token.len_min[3] = 26; + token.len_max[3] = SNMPV3_ENGINEID_MAX; + token.sep[3] = '$'; + token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // digest + token.len[4] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2; + token.sep[4] = '$'; + token.attr[4] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + // packet number + + const u8 *packet_number_pos = token.buf[1]; + const int packet_number_len = token.len[1]; + + memset (snmpv3->packet_number, 0, sizeof (snmpv3->packet_number)); + + strncpy ((char *) snmpv3->packet_number, (char *) packet_number_pos, packet_number_len); + + // salt + + const u8 *salt_pos = token.buf[2]; + const int salt_len = token.len[2]; + + u8 *salt_ptr = (u8 *) snmpv3->salt_buf; + + snmpv3->salt_len = hex_decode (salt_pos, salt_len, salt_ptr); + + salt->salt_iter = SNMPV3_ROUNDS; + + // handle unique salts detection + + md5_ctx_t md5_ctx; + + md5_init (&md5_ctx); + md5_update (&md5_ctx, snmpv3->salt_buf, snmpv3->salt_len); + md5_final (&md5_ctx); + + // store md5(snmpv3->salt_buf) in salt_buf + + salt->salt_len = 16; + + memcpy (salt->salt_buf, md5_ctx.h, salt->salt_len); + + // engineid + + const u8 *engineID_pos = token.buf[3]; + const int engineID_len = token.len[3]; + + u8 *engineID_ptr = (u8 *) snmpv3->engineID_buf; + + snmpv3->engineID_len = hex_decode (engineID_pos, engineID_len, engineID_ptr); + + // digest + + const u8 *hash_pos = token.buf[4]; + + digest[0] = hex_to_u32 (hash_pos + 0); + digest[1] = hex_to_u32 (hash_pos + 8); + digest[2] = hex_to_u32 (hash_pos + 16); + + // prefer sha1 due to speed + + digest[0] = byte_swap_32 (digest[0]); + digest[1] = byte_swap_32 (digest[1]); + digest[2] = byte_swap_32 (digest[2]); + + digest[3] = 0; + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u32 *digest = (const u32 *) digest_buf; + + snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf; + + u8 *out_buf = (u8 *) line_buf; + + int out_len = snprintf (line_buf, line_size, "%s%s$", SIGNATURE_SNMPV3, (char *) snmpv3->packet_number); + + out_len += hex_encode ((u8 *) snmpv3->salt_buf, snmpv3->salt_len, out_buf + out_len); + + out_buf[out_len] = '$'; + + out_len++; + + out_len += hex_encode ((u8 *) snmpv3->engineID_buf, snmpv3->engineID_len, out_buf + out_len); + + out_buf[out_len] = '$'; + + out_len++; + + // prefer sha1 due to speed + + u32 digest_tmp[3]; + + digest_tmp[0] = byte_swap_32 (digest[0]); + digest_tmp[1] = byte_swap_32 (digest[1]); + digest_tmp[2] = byte_swap_32 (digest[2]); + + u32_to_hex (digest_tmp[0], out_buf + out_len); out_len += 8; + u32_to_hex (digest_tmp[1], out_buf + out_len); out_len += 8; + u32_to_hex (digest_tmp[2], out_buf + out_len); out_len += 8; + + out_buf[out_len] = 0; + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = module_kernel_loops_max; + module_ctx->module_kernel_loops_min = module_kernel_loops_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = module_pw_min; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = module_tmp_size; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/src/modules/module_25100.c b/src/modules/module_25100.c index 22155ce9a..4fdbdf828 100644 --- a/src/modules/module_25100.c +++ b/src/modules/module_25100.c @@ -24,8 +24,8 @@ static const u64 KERN_TYPE = 25100; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; -static const char *ST_PASS = "hashcat"; -static const char *ST_HASH = "$SNMPv3$1$76$3081b10201033011020430f6f3d5020300ffe304010702010304373035040d80001f888059dc486145a2632202010802020ab90405706970706f040c00000000000000000000000004080000000103d5321a0460826ecf6443956d4c364bfc6f6ffc8ee0df000ffd0955af12d2c0f3c60fadea417d2bb80c0b2c1fa7a46ce44f9f16e15ee830a49881f60ecfa757d2f04000eb39a94058121d88ca20eeef4e6bf06784c67c15f144915d9bc2c6a0461da92a4abe$80001f888059dc486145a26322$c51ba677ad96869c1cb32196"; +static const char *ST_PASS = "hashcat1"; +static const char *ST_HASH = "$SNMPv3$1$45889431$30818f0201033011020409242fc0020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f4d4435040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a226020411f319300201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$1b37c3ea872731f922959e90"; u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } @@ -45,8 +45,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_SNMPV3 = "$SNMPv3$1$"; #define SNMPV3_SALT_MAX 1500 -#define SNMPV3_ENGINEID_MAX 32 -#define SNMPV3_MSG_AUTH_PARAMS_MAX 12 +#define SNMPV3_ENGINEID_MAX 34 +#define SNMPV3_MSG_AUTH_PARAMS_LEN 12 #define SNMPV3_ROUNDS 1048576 #define SNMPV3_MAX_PW_LENGTH 64 @@ -76,6 +76,13 @@ typedef struct snmpv3 } snmpv3_t; +u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 pw_min = 8; + + return pw_min; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (snmpv3_t); @@ -130,23 +137,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH | TOKEN_ATTR_VERIFY_DIGIT; // salt - token.len_min[2] = 12 * 2; + token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2; token.len_max[2] = SNMPV3_SALT_MAX * 2; token.sep[2] = '$'; token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH | TOKEN_ATTR_VERIFY_HEX; // engineid - token.len_min[3] = 5; + token.len_min[3] = 26; token.len_max[3] = SNMPV3_ENGINEID_MAX; token.sep[3] = '$'; - token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH; + token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; // digest - token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2; - token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2; + token.len[4] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2; token.sep[4] = '$'; - token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH + token.attr[4] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_HEX; const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); @@ -303,7 +310,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; module_ctx->module_pwdump_column = MODULE_DEFAULT; module_ctx->module_pw_max = MODULE_DEFAULT; - module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_pw_min = module_pw_min; module_ctx->module_salt_max = MODULE_DEFAULT; module_ctx->module_salt_min = MODULE_DEFAULT; module_ctx->module_salt_type = module_salt_type; diff --git a/src/modules/module_25200.c b/src/modules/module_25200.c index c98a347ef..66573cb5a 100644 --- a/src/modules/module_25200.c +++ b/src/modules/module_25200.c @@ -24,8 +24,8 @@ static const u64 KERN_TYPE = 25200; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; -static const char *ST_PASS = "hashcat"; -static const char *ST_HASH = "$SNMPv3$2$66763052$13981919518623358902340156831753173612320956749283824166083320737667668557830898783481876963136410266762758410322896320705075044221495960812100760230106803899899467077793703068392752686845035561487927252457444567685389901239388468830507087105054207914325254376053788152029716918450770264047103676562621965276752797029332926039166807829108367446173251908238116020942421323633620301312478670302264165059728208402342845743839533979473825394866704960428648622730299023225638967097578710279784722583947877561544154219162080289188160001741612377820114739093961409809862173307722539556954826052612794054060797358016549602977742745078911393042420821004243620362464971828700104979572910001640083882586179153483503492341163054930853321963503411228241996417991605003371264529827508426941919673592574025732354318435733211018917539824570724324796232199960952117561108106623865308577977944499366806697863259301760429786001824121720055893438673268643594146796410437039466462606490272723136671298529920486664067752007564122205089571790718437001200506203464426405927405102300269665189637001279369690218157456566218400534722049383049029139069701182053729830585217732347396312967325628046845068493719801191260136945971516486442056102815519090214442808707545803919529217103430588641187558031052830941742920355893755319896626873275796534820394248837050567688575113833311009595128372820474678989203565094681918285106102363272728922586582037066265522397748326630668375500179630717875844561081542915676557961288028298248995547031274515608973804660067065502484039882958958452781062725550260382637592283691962996228392332833626159043179186189904614052189303508782635840692436969244901198720814518$79f7b1$57e964c7cb117647004cf132"; +static const char *ST_PASS = "hashcat1"; +static const char *ST_HASH = "$SNMPv3$2$45889431$30818f02010330110204371780f3020300ffe304010102010304383036041180001f88808106d566db57fd600000000002011002020118040a6d61747269785f534841040c0000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a2260204073557d50201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$81f14f1930589f26f6755f6b"; u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } @@ -45,8 +45,8 @@ const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, static const char *SIGNATURE_SNMPV3 = "$SNMPv3$2$"; #define SNMPV3_SALT_MAX 1500 -#define SNMPV3_ENGINEID_MAX 32 -#define SNMPV3_MSG_AUTH_PARAMS_MAX 12 +#define SNMPV3_ENGINEID_MAX 34 +#define SNMPV3_MSG_AUTH_PARAMS_LEN 12 #define SNMPV3_ROUNDS 1048576 #define SNMPV3_MAX_PW_LENGTH 64 @@ -76,6 +76,13 @@ typedef struct snmpv3 } snmpv3_t; +u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 pw_min = 8; + + return pw_min; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (snmpv3_t); @@ -130,23 +137,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH | TOKEN_ATTR_VERIFY_DIGIT; // salt - token.len_min[2] = 12 * 2; + token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2; token.len_max[2] = SNMPV3_SALT_MAX * 2; token.sep[2] = '$'; token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH | TOKEN_ATTR_VERIFY_HEX; // engineid - token.len_min[3] = 5; + token.len_min[3] = 26; token.len_max[3] = SNMPV3_ENGINEID_MAX; token.sep[3] = '$'; - token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH; + token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; // digest - token.len_min[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2; - token.len_max[4] = SNMPV3_MSG_AUTH_PARAMS_MAX * 2; + token.len[4] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2; token.sep[4] = '$'; - token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH + token.attr[4] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_HEX; const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); @@ -183,10 +190,10 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE // store sha1(snmpv3->salt_buf) in salt_buf - memcpy (salt->salt_buf, sha1_ctx.h, 20); - salt->salt_len = 20; + memcpy (salt->salt_buf, sha1_ctx.h, salt->salt_len); + // engineid const u8 *engineID_pos = token.buf[3]; @@ -203,12 +210,13 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE digest[0] = hex_to_u32 (hash_pos + 0); digest[1] = hex_to_u32 (hash_pos + 8); digest[2] = hex_to_u32 (hash_pos + 16); - digest[3] = 0; digest[0] = byte_swap_32 (digest[0]); digest[1] = byte_swap_32 (digest[1]); digest[2] = byte_swap_32 (digest[2]); + digest[3] = 0; + return (PARSER_OK); } @@ -313,7 +321,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; module_ctx->module_pwdump_column = MODULE_DEFAULT; module_ctx->module_pw_max = MODULE_DEFAULT; - module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_pw_min = module_pw_min; module_ctx->module_salt_max = MODULE_DEFAULT; module_ctx->module_salt_min = MODULE_DEFAULT; module_ctx->module_salt_type = module_salt_type; diff --git a/src/modules/module_25400.c b/src/modules/module_25400.c index 341837786..6325bf737 100644 --- a/src/modules/module_25400.c +++ b/src/modules/module_25400.c @@ -103,35 +103,13 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) { - if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK) + if (device_param->device_local_mem_size < 49152) { - native_threads = 8; - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } - } - else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - if (device_param->device_local_mem_size < 49152) - { - native_threads = 32; - } - else - { - native_threads = 64; - } + native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8 } else { - native_threads = 32; + native_threads = device_param->kernel_preferred_wgs_multiple; } } diff --git a/src/mpsp.c b/src/mpsp.c index 67168e189..d096b97b5 100644 --- a/src/mpsp.c +++ b/src/mpsp.c @@ -16,7 +16,7 @@ #include "ext_lzma.h" #include "mpsp.h" -static const char *DEF_MASK = "?1?2?2?2?2?2?2?3?3?3?3?d?d?d?d"; +static const char *const DEF_MASK = "?1?2?2?2?2?2?2?3?3?3?3?d?d?d?d"; #define MAX_MFS 5 // 4*charset, 1*mask diff --git a/src/shared.c b/src/shared.c index b101cc01f..7efdd4d53 100644 --- a/src/shared.c +++ b/src/shared.c @@ -15,97 +15,97 @@ #include #endif -static const char *PA_000 = "OK"; -static const char *PA_001 = "Ignored due to comment"; -static const char *PA_002 = "Ignored due to zero length"; -static const char *PA_003 = "Line-length exception"; -static const char *PA_004 = "Hash-length exception"; -static const char *PA_005 = "Hash-value exception"; -static const char *PA_006 = "Salt-length exception"; -static const char *PA_007 = "Salt-value exception"; -static const char *PA_008 = "Salt-iteration count exception"; -static const char *PA_009 = "Separator unmatched"; -static const char *PA_010 = "Signature unmatched"; -static const char *PA_011 = "Invalid hccapx file size"; -static const char *PA_012 = "Invalid hccapx eapol size"; -static const char *PA_013 = "Invalid psafe2 filesize"; -static const char *PA_014 = "Invalid psafe3 filesize"; -static const char *PA_015 = "Invalid truecrypt filesize"; -static const char *PA_016 = "Invalid veracrypt filesize"; -static const char *PA_017 = "Invalid SIP directive, only MD5 is supported"; -static const char *PA_018 = "Hash-file exception"; -static const char *PA_019 = "Hash-encoding exception"; -static const char *PA_020 = "Salt-encoding exception"; -static const char *PA_021 = "Invalid LUKS filesize"; -static const char *PA_022 = "Invalid LUKS identifier"; -static const char *PA_023 = "Invalid LUKS version"; -static const char *PA_024 = "Invalid or unsupported LUKS cipher type"; -static const char *PA_025 = "Invalid or unsupported LUKS cipher mode"; -static const char *PA_026 = "Invalid or unsupported LUKS hash type"; -static const char *PA_027 = "Invalid LUKS key size"; -static const char *PA_028 = "Disabled LUKS key detected"; -static const char *PA_029 = "Invalid LUKS key AF stripes count"; -static const char *PA_030 = "Invalid combination of LUKS hash type and cipher type"; -static const char *PA_031 = "Invalid hccapx signature"; -static const char *PA_032 = "Invalid hccapx version"; -static const char *PA_033 = "Invalid hccapx message pair"; -static const char *PA_034 = "Token encoding exception"; -static const char *PA_035 = "Token length exception"; -static const char *PA_036 = "Insufficient entropy exception"; -static const char *PA_037 = "Hash contains unsupported compression type for current mode"; -static const char *PA_038 = "Invalid key size"; -static const char *PA_039 = "Invalid block size"; -static const char *PA_040 = "Invalid or unsupported cipher"; -static const char *PA_041 = "Invalid filesize"; -static const char *PA_042 = "IV length exception"; -static const char *PA_043 = "CT length exception"; -static const char *PA_255 = "Unknown error"; +static const char *const PA_000 = "OK"; +static const char *const PA_001 = "Ignored due to comment"; +static const char *const PA_002 = "Ignored due to zero length"; +static const char *const PA_003 = "Line-length exception"; +static const char *const PA_004 = "Hash-length exception"; +static const char *const PA_005 = "Hash-value exception"; +static const char *const PA_006 = "Salt-length exception"; +static const char *const PA_007 = "Salt-value exception"; +static const char *const PA_008 = "Salt-iteration count exception"; +static const char *const PA_009 = "Separator unmatched"; +static const char *const PA_010 = "Signature unmatched"; +static const char *const PA_011 = "Invalid hccapx file size"; +static const char *const PA_012 = "Invalid hccapx eapol size"; +static const char *const PA_013 = "Invalid psafe2 filesize"; +static const char *const PA_014 = "Invalid psafe3 filesize"; +static const char *const PA_015 = "Invalid truecrypt filesize"; +static const char *const PA_016 = "Invalid veracrypt filesize"; +static const char *const PA_017 = "Invalid SIP directive, only MD5 is supported"; +static const char *const PA_018 = "Hash-file exception"; +static const char *const PA_019 = "Hash-encoding exception"; +static const char *const PA_020 = "Salt-encoding exception"; +static const char *const PA_021 = "Invalid LUKS filesize"; +static const char *const PA_022 = "Invalid LUKS identifier"; +static const char *const PA_023 = "Invalid LUKS version"; +static const char *const PA_024 = "Invalid or unsupported LUKS cipher type"; +static const char *const PA_025 = "Invalid or unsupported LUKS cipher mode"; +static const char *const PA_026 = "Invalid or unsupported LUKS hash type"; +static const char *const PA_027 = "Invalid LUKS key size"; +static const char *const PA_028 = "Disabled LUKS key detected"; +static const char *const PA_029 = "Invalid LUKS key AF stripes count"; +static const char *const PA_030 = "Invalid combination of LUKS hash type and cipher type"; +static const char *const PA_031 = "Invalid hccapx signature"; +static const char *const PA_032 = "Invalid hccapx version"; +static const char *const PA_033 = "Invalid hccapx message pair"; +static const char *const PA_034 = "Token encoding exception"; +static const char *const PA_035 = "Token length exception"; +static const char *const PA_036 = "Insufficient entropy exception"; +static const char *const PA_037 = "Hash contains unsupported compression type for current mode"; +static const char *const PA_038 = "Invalid key size"; +static const char *const PA_039 = "Invalid block size"; +static const char *const PA_040 = "Invalid or unsupported cipher"; +static const char *const PA_041 = "Invalid filesize"; +static const char *const PA_042 = "IV length exception"; +static const char *const PA_043 = "CT length exception"; +static const char *const PA_255 = "Unknown error"; -static const char *OPTI_STR_OPTIMIZED_KERNEL = "Optimized-Kernel"; -static const char *OPTI_STR_ZERO_BYTE = "Zero-Byte"; -static const char *OPTI_STR_PRECOMPUTE_INIT = "Precompute-Init"; -static const char *OPTI_STR_MEET_IN_MIDDLE = "Meet-In-The-Middle"; -static const char *OPTI_STR_EARLY_SKIP = "Early-Skip"; -static const char *OPTI_STR_NOT_SALTED = "Not-Salted"; -static const char *OPTI_STR_NOT_ITERATED = "Not-Iterated"; -static const char *OPTI_STR_PREPENDED_SALT = "Prepended-Salt"; -static const char *OPTI_STR_APPENDED_SALT = "Appended-Salt"; -static const char *OPTI_STR_SINGLE_HASH = "Single-Hash"; -static const char *OPTI_STR_SINGLE_SALT = "Single-Salt"; -static const char *OPTI_STR_BRUTE_FORCE = "Brute-Force"; -static const char *OPTI_STR_RAW_HASH = "Raw-Hash"; -static const char *OPTI_STR_SLOW_HASH_SIMD_INIT = "Slow-Hash-SIMD-INIT"; -static const char *OPTI_STR_SLOW_HASH_SIMD_LOOP = "Slow-Hash-SIMD-LOOP"; -static const char *OPTI_STR_SLOW_HASH_SIMD_COMP = "Slow-Hash-SIMD-COMP"; -static const char *OPTI_STR_USES_BITS_8 = "Uses-8-Bit"; -static const char *OPTI_STR_USES_BITS_16 = "Uses-16-Bit"; -static const char *OPTI_STR_USES_BITS_32 = "Uses-32-Bit"; -static const char *OPTI_STR_USES_BITS_64 = "Uses-64-Bit"; +static const char *const OPTI_STR_OPTIMIZED_KERNEL = "Optimized-Kernel"; +static const char *const OPTI_STR_ZERO_BYTE = "Zero-Byte"; +static const char *const OPTI_STR_PRECOMPUTE_INIT = "Precompute-Init"; +static const char *const OPTI_STR_MEET_IN_MIDDLE = "Meet-In-The-Middle"; +static const char *const OPTI_STR_EARLY_SKIP = "Early-Skip"; +static const char *const OPTI_STR_NOT_SALTED = "Not-Salted"; +static const char *const OPTI_STR_NOT_ITERATED = "Not-Iterated"; +static const char *const OPTI_STR_PREPENDED_SALT = "Prepended-Salt"; +static const char *const OPTI_STR_APPENDED_SALT = "Appended-Salt"; +static const char *const OPTI_STR_SINGLE_HASH = "Single-Hash"; +static const char *const OPTI_STR_SINGLE_SALT = "Single-Salt"; +static const char *const OPTI_STR_BRUTE_FORCE = "Brute-Force"; +static const char *const OPTI_STR_RAW_HASH = "Raw-Hash"; +static const char *const OPTI_STR_SLOW_HASH_SIMD_INIT = "Slow-Hash-SIMD-INIT"; +static const char *const OPTI_STR_SLOW_HASH_SIMD_LOOP = "Slow-Hash-SIMD-LOOP"; +static const char *const OPTI_STR_SLOW_HASH_SIMD_COMP = "Slow-Hash-SIMD-COMP"; +static const char *const OPTI_STR_USES_BITS_8 = "Uses-8-Bit"; +static const char *const OPTI_STR_USES_BITS_16 = "Uses-16-Bit"; +static const char *const OPTI_STR_USES_BITS_32 = "Uses-32-Bit"; +static const char *const OPTI_STR_USES_BITS_64 = "Uses-64-Bit"; -static const char *HASH_CATEGORY_UNDEFINED_STR = "Undefined"; -static const char *HASH_CATEGORY_RAW_HASH_STR = "Raw Hash"; -static const char *HASH_CATEGORY_RAW_HASH_SALTED_STR = "Raw Hash, Salted and/or Iterated"; -static const char *HASH_CATEGORY_RAW_HASH_AUTHENTICATED_STR = "Raw Hash, Authenticated"; -static const char *HASH_CATEGORY_RAW_CIPHER_KPA_STR = "Raw Cipher, Known-Plaintext attack"; -static const char *HASH_CATEGORY_GENERIC_KDF_STR = "Generic KDF"; -static const char *HASH_CATEGORY_NETWORK_PROTOCOL_STR = "Network Protocols"; -static const char *HASH_CATEGORY_FORUM_SOFTWARE_STR = "Forums, CMS, E-Commerce"; -static const char *HASH_CATEGORY_DATABASE_SERVER_STR = "Database Server"; -static const char *HASH_CATEGORY_NETWORK_SERVER_STR = "FTP, HTTP, SMTP, LDAP Server"; -static const char *HASH_CATEGORY_RAW_CHECKSUM_STR = "Raw Checksum"; -static const char *HASH_CATEGORY_OS_STR = "Operating System"; -static const char *HASH_CATEGORY_EAS_STR = "Enterprise Application Software (EAS)"; -static const char *HASH_CATEGORY_ARCHIVE_STR = "Archives"; -static const char *HASH_CATEGORY_FDE_STR = "Full-Disk Encryption (FDE)"; -static const char *HASH_CATEGORY_FBE_STR = "File-Based Encryption (FBE)"; -static const char *HASH_CATEGORY_DOCUMENTS_STR = "Documents"; -static const char *HASH_CATEGORY_PASSWORD_MANAGER_STR = "Password Managers"; -static const char *HASH_CATEGORY_OTP_STR = "One-Time Passwords"; -static const char *HASH_CATEGORY_PLAIN_STR = "Plaintext"; -static const char *HASH_CATEGORY_FRAMEWORK_STR = "Framework"; -static const char *HASH_CATEGORY_PRIVATE_KEY_STR = "Private Key"; -static const char *HASH_CATEGORY_IMS_STR = "Instant Messaging Service"; -static const char *HASH_CATEGORY_CRYPTOCURRENCY_WALLET_STR = "Cryptocurrency Wallet"; +static const char *const HASH_CATEGORY_UNDEFINED_STR = "Undefined"; +static const char *const HASH_CATEGORY_RAW_HASH_STR = "Raw Hash"; +static const char *const HASH_CATEGORY_RAW_HASH_SALTED_STR = "Raw Hash, Salted and/or Iterated"; +static const char *const HASH_CATEGORY_RAW_HASH_AUTHENTICATED_STR = "Raw Hash, Authenticated"; +static const char *const HASH_CATEGORY_RAW_CIPHER_KPA_STR = "Raw Cipher, Known-Plaintext attack"; +static const char *const HASH_CATEGORY_GENERIC_KDF_STR = "Generic KDF"; +static const char *const HASH_CATEGORY_NETWORK_PROTOCOL_STR = "Network Protocols"; +static const char *const HASH_CATEGORY_FORUM_SOFTWARE_STR = "Forums, CMS, E-Commerce"; +static const char *const HASH_CATEGORY_DATABASE_SERVER_STR = "Database Server"; +static const char *const HASH_CATEGORY_NETWORK_SERVER_STR = "FTP, HTTP, SMTP, LDAP Server"; +static const char *const HASH_CATEGORY_RAW_CHECKSUM_STR = "Raw Checksum"; +static const char *const HASH_CATEGORY_OS_STR = "Operating System"; +static const char *const HASH_CATEGORY_EAS_STR = "Enterprise Application Software (EAS)"; +static const char *const HASH_CATEGORY_ARCHIVE_STR = "Archives"; +static const char *const HASH_CATEGORY_FDE_STR = "Full-Disk Encryption (FDE)"; +static const char *const HASH_CATEGORY_FBE_STR = "File-Based Encryption (FBE)"; +static const char *const HASH_CATEGORY_DOCUMENTS_STR = "Documents"; +static const char *const HASH_CATEGORY_PASSWORD_MANAGER_STR = "Password Managers"; +static const char *const HASH_CATEGORY_OTP_STR = "One-Time Passwords"; +static const char *const HASH_CATEGORY_PLAIN_STR = "Plaintext"; +static const char *const HASH_CATEGORY_FRAMEWORK_STR = "Framework"; +static const char *const HASH_CATEGORY_PRIVATE_KEY_STR = "Private Key"; +static const char *const HASH_CATEGORY_IMS_STR = "Instant Messaging Service"; +static const char *const HASH_CATEGORY_CRYPTOCURRENCY_WALLET_STR = "Cryptocurrency Wallet"; int sort_by_string_sized (const void *p1, const void *p2) { @@ -1062,7 +1062,7 @@ static int rounds_count_length (const char *input_buf, const int input_len) { if (input_len >= 9) // 9 is minimum because of "rounds=X$" { - static const char *rounds = "rounds="; + static const char *const rounds = "rounds="; if (memcmp (input_buf, rounds, 7) == 0) { diff --git a/src/status.c b/src/status.c index 6b4084b59..c26e43e51 100644 --- a/src/status.c +++ b/src/status.c @@ -18,29 +18,29 @@ #include "shared.h" #include "status.h" -static const char *ST_0000 = "Initializing"; -static const char *ST_0001 = "Autotuning"; -static const char *ST_0002 = "Selftest"; -static const char *ST_0003 = "Running"; -static const char *ST_0004 = "Paused"; -static const char *ST_0005 = "Exhausted"; -static const char *ST_0006 = "Cracked"; -static const char *ST_0007 = "Aborted"; -static const char *ST_0008 = "Quit"; -static const char *ST_0009 = "Bypass"; -static const char *ST_0010 = "Aborted (Checkpoint)"; -static const char *ST_0011 = "Aborted (Runtime)"; -static const char *ST_0012 = "Running (Checkpoint Quit requested)"; -static const char *ST_0013 = "Error"; -static const char *ST_0014 = "Aborted (Finish)"; -static const char *ST_0015 = "Running (Quit after attack requested)"; -static const char *ST_0016 = "Autodetect"; -static const char *ST_9999 = "Unknown! Bug!"; +static const char *const ST_0000 = "Initializing"; +static const char *const ST_0001 = "Autotuning"; +static const char *const ST_0002 = "Selftest"; +static const char *const ST_0003 = "Running"; +static const char *const ST_0004 = "Paused"; +static const char *const ST_0005 = "Exhausted"; +static const char *const ST_0006 = "Cracked"; +static const char *const ST_0007 = "Aborted"; +static const char *const ST_0008 = "Quit"; +static const char *const ST_0009 = "Bypass"; +static const char *const ST_0010 = "Aborted (Checkpoint)"; +static const char *const ST_0011 = "Aborted (Runtime)"; +static const char *const ST_0012 = "Running (Checkpoint Quit requested)"; +static const char *const ST_0013 = "Error"; +static const char *const ST_0014 = "Aborted (Finish)"; +static const char *const ST_0015 = "Running (Quit after attack requested)"; +static const char *const ST_0016 = "Autodetect"; +static const char *const ST_9999 = "Unknown! Bug!"; static const char UNITS[7] = { ' ', 'k', 'M', 'G', 'T', 'P', 'E' }; -static const char *ETA_ABSOLUTE_MAX_EXCEEDED = "Next Big Bang"; // in honor of ighashgpu -static const char *ETA_RELATIVE_MAX_EXCEEDED = "> 10 years"; +static const char *const ETA_ABSOLUTE_MAX_EXCEEDED = "Next Big Bang"; // in honor of ighashgpu +static const char *const ETA_RELATIVE_MAX_EXCEEDED = "> 10 years"; static char *status_get_rules_file (const hashcat_ctx_t *hashcat_ctx) { diff --git a/src/terminal.c b/src/terminal.c index 167b8b821..03173c690 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -19,8 +19,8 @@ static const size_t TERMINAL_LINE_LENGTH = 79; -static const char *PROMPT_ACTIVE = "[s]tatus [p]ause [b]ypass [c]heckpoint [f]inish [q]uit => "; -static const char *PROMPT_PAUSED = "[s]tatus [r]esume [b]ypass [c]heckpoint [f]inish [q]uit => "; +static const char *const PROMPT_ACTIVE = "[s]tatus [p]ause [b]ypass [c]heckpoint [f]inish [q]uit => "; +static const char *const PROMPT_PAUSED = "[s]tatus [r]esume [b]ypass [c]heckpoint [f]inish [q]uit => "; void welcome_screen (hashcat_ctx_t *hashcat_ctx, const char *version_tag) { @@ -640,7 +640,7 @@ void hash_info_single (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *user_op if (hashconfig->is_salted == true) { u32 t = hashconfig->salt_type; - char *t_desc = (t == SALT_TYPE_EMBEDDED) ? "Embedded\0" : (t == SALT_TYPE_GENERIC) ? "Generic\0" : "Virtual\0"; + const char *t_desc = (t == SALT_TYPE_EMBEDDED) ? "Embedded\0" : (t == SALT_TYPE_GENERIC) ? "Generic\0" : "Virtual\0"; event_log_info (hashcat_ctx, " Salt.Type...........: %s", t_desc); event_log_info (hashcat_ctx, " Salt.Len.Min........: %d", hashconfig->salt_min); event_log_info (hashcat_ctx, " Salt.Len.Max........: %d", hashconfig->salt_max); @@ -818,9 +818,9 @@ void backend_info (hashcat_ctx_t *hashcat_ctx) event_log_info (hashcat_ctx, NULL); int hip_devices_cnt = backend_ctx->hip_devices_cnt; - int hip_driver_version = backend_ctx->hip_driver_version; + int hip_driverVersion = backend_ctx->hip_driverVersion; - event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driver_version / 1000, (hip_driver_version % 100) / 10); + event_log_info (hashcat_ctx, "HIP.Version.: %d.%d", hip_driverVersion / 100, hip_driverVersion % 10); event_log_info (hashcat_ctx, NULL); for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++) @@ -1014,10 +1014,10 @@ void backend_info_compact (hashcat_ctx_t *hashcat_ctx) if (backend_ctx->hip) { - int hip_devices_cnt = backend_ctx->hip_devices_cnt; - int hip_driver_version = backend_ctx->hip_driver_version; + int hip_devices_cnt = backend_ctx->hip_devices_cnt; + int hip_driverVersion = backend_ctx->hip_driverVersion; - const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driver_version / 1000, (hip_driver_version % 100) / 10); + const size_t len = event_log_info (hashcat_ctx, "HIP API (HIP %d.%d)", hip_driverVersion / 100, hip_driverVersion % 10); char line[HCBUFSIZ_TINY] = { 0 }; diff --git a/src/user_options.c b/src/user_options.c index 14a75c8bd..77d989376 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -20,12 +20,12 @@ #endif #ifdef WITH_BRAIN -static const char *short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSz"; +static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSz"; #else -static const char *short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMS"; +static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMS"; #endif -static char *SEPARATOR = ":"; +static char *const SEPARATOR = ":"; static const struct option long_options[] = { @@ -147,15 +147,15 @@ static const struct option long_options[] = {NULL, 0, NULL, 0 } }; -static const char *ENCODING_FROM = "utf-8"; -static const char *ENCODING_TO = "utf-8"; +static const char *const ENCODING_FROM = "utf-8"; +static const char *const ENCODING_TO = "utf-8"; -static const char *RULE_BUF_R = ":"; -static const char *RULE_BUF_L = ":"; +static const char *const RULE_BUF_R = ":"; +static const char *const RULE_BUF_L = ":"; -static const char *DEF_MASK_CS_1 = "?l?d?u"; -static const char *DEF_MASK_CS_2 = "?l?d"; -static const char *DEF_MASK_CS_3 = "?l?d*!$@_"; +static const char *const DEF_MASK_CS_1 = "?l?d?u"; +static const char *const DEF_MASK_CS_2 = "?l?d"; +static const char *const DEF_MASK_CS_3 = "?l?d*!$@_"; int user_options_init (hashcat_ctx_t *hashcat_ctx) { diff --git a/tools/benchmark_deep.pl b/tools/benchmark_deep.pl index df6777441..ba2db8856 100755 --- a/tools/benchmark_deep.pl +++ b/tools/benchmark_deep.pl @@ -230,16 +230,19 @@ my @hash_types = 13751, 13761, 13771, + 13781, 13800, 13900, 14000, 14100, 14400, + 14500, 14700, 14800, 14900, 15000, 15100, + 15200, 15300, 15400, 15500, @@ -250,10 +253,13 @@ my @hash_types = 16200, 16300, 16400, + 16500, 16600, + 16700, 16800, 16801, 16900, + 17210, 17300, 17400, 17500, @@ -333,12 +339,23 @@ my @hash_types = 24700, 24800, 24900, + 25000, + 25100, + 25200, 25300, 25400, 25500, + 25700, 25900, 26000, 26100, + 26200, + 26300, + 26401, + 26402, + 26403, + 26500, + 26600, ); if (scalar @ARGV) diff --git a/tools/cryptoloop2hashcat.py b/tools/cryptoloop2hashcat.py old mode 100644 new mode 100755 diff --git a/tools/test_modules/m25000.pm b/tools/test_modules/m25000.pm new file mode 100644 index 000000000..71cc1c512 --- /dev/null +++ b/tools/test_modules/m25000.pm @@ -0,0 +1,116 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::MD5 qw (md5 md5_hex); +use Digest::SHA qw (sha1 sha1_hex); +use Digest::HMAC qw (hmac hmac_hex); + +sub module_constraints { [[8, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + my $salt = shift; + my $pkt_num = shift // int(rand(100000000)); + my $engineID = shift // random_hex_string(26, 34); + my $mode = shift // int(rand(1)) + 1; + + # make even if needed + + if (length($salt) %2 == 1) + { + $salt = $salt . "8"; + } + + my $string1 = $word x 1048576; + + $string1 = substr ($string1, 0, 1048576); + + my $digest1 = ''; + + if ($mode eq 2) + { + $digest1 = sha1_hex ($string1); + } + elsif ($mode eq 1) + { + $digest1 = md5_hex ($string1); + } + + my $buf = join '', $digest1, $engineID, $digest1; + + my $digest = ''; + + if ($mode eq 2) + { + my $digest2 = sha1(pack("H*", $buf)); + + $digest = hmac_hex (pack("H*", $salt), $digest2, \&sha1); + } + elsif ($mode eq 1) + { + my $digest2 = md5(pack("H*", $buf)); + + $digest = hmac_hex (pack("H*", $salt), $digest2, \&md5); + } + + $digest = substr ($digest, 0, 24); + + my $hash = sprintf ("\$SNMPv3\$0\$%s\$%s\$%s\$%s", $pkt_num, $salt, $engineID, $digest); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my $idx = index ($line, ':'); + + return unless $idx >= 0; + + my $hash = substr ($line, 0, $idx); + my $word = substr ($line, $idx + 1); + + return unless length ($word) gt 0; + return unless substr ($hash, 0, 10) eq '$SNMPv3$0$'; + + my (undef, $signature, $version, $pkt_num, $salt, $engineID, $digest) = split '\$', $hash; + + return unless defined $signature; + return unless defined $version; + return unless defined $pkt_num; + return unless defined $salt; + return unless defined $engineID; + return unless defined $digest; + + my $word_packed = pack_if_HEX_notation ($word); + + # gen md5 & sha1 hashes + + my $new_hash_md5 = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID, 1); + my $new_hash_sha1 = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID, 2); + + # parse digests + + my (undef, undef, undef, undef, undef, undef, $digest_md5) = split '\$', $new_hash_md5; + my (undef, undef, undef, undef, undef, undef, $digest_sha1) = split '\$', $new_hash_sha1; + + if ($digest eq $digest_md5) + { + return ($new_hash_md5, $word); + } + else + { + return ($new_hash_sha1, $word); + } +} + +1; diff --git a/tools/test_modules/m25100.pm b/tools/test_modules/m25100.pm index 6249df9a8..2335f7f2f 100644 --- a/tools/test_modules/m25100.pm +++ b/tools/test_modules/m25100.pm @@ -11,14 +11,14 @@ use warnings; use Digest::MD5 qw (md5 md5_hex); use Digest::HMAC qw (hmac hmac_hex); -sub module_constraints { [[1, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] } +sub module_constraints { [[8, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] } sub module_generate_hash { my $word = shift; my $salt = shift; - my $pkt_num = shift // int(rand(99999999)); - my $engineID = shift // random_hex_string(6); + my $pkt_num = shift // int(rand(100000000)); + my $engineID = shift // random_hex_string(26, 34); # make even if needed @@ -71,7 +71,7 @@ sub module_verify_hash my $word_packed = pack_if_HEX_notation ($word); - my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID); #, $digest); + my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID); return ($new_hash, $word); } diff --git a/tools/test_modules/m25200.pm b/tools/test_modules/m25200.pm index c44212825..d27908255 100644 --- a/tools/test_modules/m25200.pm +++ b/tools/test_modules/m25200.pm @@ -11,14 +11,14 @@ use warnings; use Digest::SHA qw (sha1 sha1_hex); use Digest::HMAC qw (hmac hmac_hex); -sub module_constraints { [[1, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] } +sub module_constraints { [[8, 256], [24, 3000], [-1, -1], [-1, -1], [-1, -1]] } sub module_generate_hash { my $word = shift; my $salt = shift; - my $pkt_num = shift // int(rand(99999999)); - my $engineID = shift // random_hex_string(6); + my $pkt_num = shift // int(rand(100000000)); + my $engineID = shift // random_hex_string(26, 34); # make even if needed