From 540b405e3ac769c182e88eb6c3d115e561d4f06f Mon Sep 17 00:00:00 2001 From: jsteube Date: Sun, 24 Feb 2019 10:12:48 +0100 Subject: [PATCH] Replace IS_ROCM with HAS_VPERM and HAS_VADD3 --- OpenCL/inc_common.cl | 84 +++++++++++++++---------------- OpenCL/inc_rp_optimized.cl | 4 +- OpenCL/inc_types.cl | 8 +-- OpenCL/inc_vendor.cl | 24 ++------- include/opencl.h | 2 +- include/types.h | 3 +- src/opencl.c | 100 +++++++++++++++++-------------------- 7 files changed, 101 insertions(+), 124 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 8833f3c26..00ceb29b0 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -417,7 +417,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x3727); out1[0] = hc_byte_perm (in[0], 0, 0x1707); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x03070207); out2[2] = hc_byte_perm (in[3], 0, 0x01070007); @@ -455,7 +455,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x1707); out1[0] = hc_byte_perm (in[0], 0, 0x3727); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x01070007); out2[2] = hc_byte_perm (in[3], 0, 0x03070207); @@ -493,7 +493,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x7372); out1[0] = hc_byte_perm (in[0], 0, 0x7170); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x07030702); out2[2] = hc_byte_perm (in[3], 0, 0x07010700); @@ -531,7 +531,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x7170); out1[0] = hc_byte_perm (in[0], 0, 0x7372); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x07010700); out2[2] = hc_byte_perm (in[3], 0, 0x07030702); @@ -565,7 +565,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out) out[2] = hc_byte_perm (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm (in2[2], in2[3], 0x4602); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002); @@ -595,7 +595,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out) out[2] = hc_byte_perm (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm (in2[2], in2[3], 0x6420); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200); @@ -723,7 +723,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC w0[0] = swap32 (w0[0]); w0[1] = swap32 (w0[1]); w0[2] = swap32 (w0[2]); @@ -1082,7 +1082,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 w3[3] = swap32 (w3[3]); #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; @@ -2740,7 +2740,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { @@ -3067,7 +3067,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3 #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -3407,7 +3407,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -3868,7 +3868,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3 } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -4347,7 +4347,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC w0[0] = swap32 (w0[0]); w0[1] = swap32 (w0[1]); w0[2] = swap32 (w0[2]); @@ -5570,7 +5570,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x w7[3] = swap32 (w7[3]); #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; @@ -6149,7 +6149,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -7306,7 +7306,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -8477,7 +8477,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2 { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -10162,7 +10162,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2 } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -11865,7 +11865,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC #pragma unroll for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]); @@ -16230,7 +16230,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; @@ -20601,7 +20601,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -24958,7 +24958,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset) } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -31297,7 +31297,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2) out1[1] = hc_byte_perm_S (in[0], 0, 0x3727); out1[0] = hc_byte_perm_S (in[0], 0, 0x1707); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007); @@ -31335,7 +31335,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2) out1[1] = hc_byte_perm_S (in[0], 0, 0x7372); out1[0] = hc_byte_perm_S (in[0], 0, 0x7170); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702); out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700); @@ -31369,7 +31369,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out) out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002); @@ -31399,7 +31399,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out) out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420); - #elif defined IS_AMD && AMD_GCN >= 3 + #elif defined IS_AMD && HAS_VPERM out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200); @@ -31428,7 +31428,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC w0[0] = swap32_S (w0[0]); w0[1] = swap32_S (w0[1]); w0[2] = swap32_S (w0[2]); @@ -31787,7 +31787,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, w3[3] = swap32_S (w3[3]); #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; @@ -33444,7 +33444,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -33769,7 +33769,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -34108,7 +34108,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -34569,7 +34569,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -35048,7 +35048,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC w0[0] = swap32_S (w0[0]); w0[1] = swap32_S (w0[1]); w0[2] = swap32_S (w0[2]); @@ -36271,7 +36271,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * w7[3] = swap32_S (w7[3]); #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; @@ -36850,7 +36850,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 * { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38007,7 +38007,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 * } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -39178,7 +39178,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -40863,7 +40863,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; @@ -42566,7 +42566,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC #pragma unroll for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]); @@ -46931,7 +46931,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; @@ -51302,7 +51302,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -55659,7 +55659,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset) } #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index e5c15daf5..a097836f5 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -786,7 +786,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c const int offset_switch = offset / 4; #endif - #if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC + #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC const u32 src_r00 = swap32_S (src_r0[0]); const u32 src_r01 = swap32_S (src_r0[1]); const u32 src_r02 = swap32_S (src_r0[2]); @@ -898,7 +898,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c s7 = swap32_S (s7); #endif - #if (defined IS_AMD && AMD_GCN >= 3) || defined IS_NV + #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; diff --git a/OpenCL/inc_types.cl b/OpenCL/inc_types.cl index bce0ba1f4..02ba0d84c 100644 --- a/OpenCL/inc_types.cl +++ b/OpenCL/inc_types.cl @@ -175,7 +175,7 @@ DECLSPEC u64x hl32_to_64 (const u32x a, const u32x b) #ifdef IS_AMD -#if AMD_GCN >= 3 +#if HAS_VPERM DECLSPEC u32 swap32_S (const u32 v) { u32 r; @@ -240,7 +240,7 @@ DECLSPEC u64 rotl64_S (const u64 a, const u32 n) return rotr64_S (a, 64 - n); } -#if AMD_GCN >= 3 +#if HAS_VPERM DECLSPEC u32x swap32 (const u32x v) { return bitselect (rotate (v, 24u), rotate (v, 8u), 0x00ff00ffu); @@ -371,7 +371,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c) return amd_bytealign (a, b, c); } -#if AMD_GCN >= 3 +#if HAS_VPERM DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const u32x c) { u32x r; @@ -435,7 +435,7 @@ DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const u32 c) } #endif -#if AMD_GCN >= 5 +#if HAS_VADD3 DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) { u32x r; diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index 4f3ef1a22..18fce9a16 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -25,26 +25,6 @@ #if VENDOR_ID == (1 << 0) #define IS_AMD -#define AMD_GCN 0 -#if AMD_ROCM == 1 -#if defined __gfx600__ || defined __gfx601__ -#undef AMD_GCN -#define AMD_GCN 1 -#endif -#if defined __gfx700__ || defined __gfx701__ || defined __gfx702__ || defined __gfx703__ -#undef AMD_GCN -#define AMD_GCN 2 -#endif -#if defined __gfx800__ || defined __gfx801__ || defined __gfx802__ || defined __gfx803__ || defined __gfx804__ || defined __gfx810__ -#undef AMD_GCN -#define AMD_GCN 3 -// According to AMD docs, GCN 3 and 4 are the same -#endif -#if defined __gfx900__ || defined __gfx901__ || defined __gfx902__ || defined __gfx903__ -#undef AMD_GCN -#define AMD_GCN 5 -#endif -#endif #elif VENDOR_ID == (1 << 1) #define IS_APPLE #define IS_GENERIC @@ -95,7 +75,9 @@ #define DECLSPEC #endif -#if (defined IS_AMD && AMD_GCN < 3) +// HAS_VPERM indicated ROCM + +#if (defined IS_AMD && HAS_VPERM == 0) #define MAYBE_VOLATILE volatile #else #define MAYBE_VOLATILE diff --git a/include/opencl.h b/include/opencl.h index 06eb71fd2..56f76dd1b 100644 --- a/include/opencl.h +++ b/include/opencl.h @@ -30,7 +30,7 @@ int hc_clCreateCommandQueue (hashcat_ctx_t *hashcat_ctx, cl_context context int hc_clCreateContext (hashcat_ctx_t *hashcat_ctx, cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void (CL_CALLBACK *pfn_notify) (const char *, const void *, size_t, void *), void *user_data, cl_context *context); int hc_clCreateKernel (hashcat_ctx_t *hashcat_ctx, cl_program program, const char *kernel_name, cl_kernel *kernel); int hc_clCreateProgramWithBinary (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, unsigned char **binaries, cl_int *binary_status, cl_program *program); -int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, char **strings, const size_t *lengths, cl_program *program); +int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program); int hc_clEnqueueCopyBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); int hc_clEnqueueMapBuffer (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, void **buf); int hc_clEnqueueNDRangeKernel (hashcat_ctx_t *hashcat_ctx, cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); diff --git a/include/types.h b/include/types.h index 9603c6111..ce4a0068f 100644 --- a/include/types.h +++ b/include/types.h @@ -1258,7 +1258,8 @@ typedef struct hc_device_param char *driver_version; char *device_opencl_version; - bool is_rocm; + bool has_vperm; + bool has_vadd3; double spin_damp; diff --git a/src/opencl.c b/src/opencl.c index 491669362..5d1d8bef8 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -333,6 +333,31 @@ static bool write_kernel_binary (hashcat_ctx_t *hashcat_ctx, char *kernel_file, return true; } +static bool test_instruction (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_buf) +{ + int CL_rc; + + cl_program program; + + CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, &kernel_buf, NULL, &program); + + if (CL_rc == -1) return false; + + opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx; + + OCL_PTR *ocl = opencl_ctx->ocl; + + CL_rc = ocl->clBuildProgram (program, 1, &device_param->device, NULL, NULL, NULL); // do not use the wrapper to avoid the error message + + const bool r = (CL_rc == CL_SUCCESS) ? true : false; + + CL_rc = hc_clReleaseProgram (hashcat_ctx, program); + + if (CL_rc == -1) return false; + + return r; +} + void generate_source_kernel_filename (const bool slow_candidates, const u32 attack_exec, const u32 attack_kern, const u32 kern_type, const u32 opti_type, char *shared_dir, char *source_file) { if (opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) @@ -859,7 +884,7 @@ int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_fl return 0; } -int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, char **strings, const size_t *lengths, cl_program *program) +int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_program *program) { opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx; @@ -867,7 +892,7 @@ int hc_clCreateProgramWithSource (hashcat_ctx_t *hashcat_ctx, cl_context context cl_int CL_err; - *program = ocl->clCreateProgramWithSource (context, count, (const char **) strings, lengths, &CL_err); + *program = ocl->clCreateProgramWithSource (context, count, strings, lengths, &CL_err); if (CL_err != CL_SUCCESS) { @@ -3811,25 +3836,15 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) bool amd_warn = true; #if defined (__linux__) - if (device_param->is_rocm == false) - { - // ROCm is so much better, we should give the user some hint and remove this block - - // AMDGPU-PRO Driver 16.40 and higher - if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false; - // AMDGPU-PRO Driver 16.50 is known to be broken - if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true; - // AMDGPU-PRO Driver 16.60 is known to be broken - if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true; - // AMDGPU-PRO Driver 17.10 is known to be broken - if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true; - // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117 - } - else - { - // Support for ROCm platform - if (strtof (device_param->driver_version, NULL) >= 1.1f) amd_warn = false; - } + // AMDGPU-PRO Driver 16.40 and higher + if (strtoul (device_param->driver_version, NULL, 10) >= 2117) amd_warn = false; + // AMDGPU-PRO Driver 16.50 is known to be broken + if (strtoul (device_param->driver_version, NULL, 10) == 2236) amd_warn = true; + // AMDGPU-PRO Driver 16.60 is known to be broken + if (strtoul (device_param->driver_version, NULL, 10) == 2264) amd_warn = true; + // AMDGPU-PRO Driver 17.10 is known to be broken + if (strtoul (device_param->driver_version, NULL, 10) == 2348) amd_warn = true; + // AMDGPU-PRO Driver 17.20 (2416) is fine, doesn't need check will match >= 2117 #elif defined (_WIN) // AMD Radeon Software 14.9 and higher, should be updated to 15.12 if (strtoul (device_param->driver_version, NULL, 10) >= 1573) amd_warn = false; @@ -4592,34 +4607,13 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && (device_param->platform_vendor_id == VENDOR_ID_AMD)) { - char *kernel_buf = "__kernel void test (__global int *p) { __asm__ (\"DS_NOP\"); }"; - - const size_t kernel_len = strlen (kernel_buf); - - cl_program program; - - CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, &kernel_buf, &kernel_len, &program); - - if (CL_rc == -1) return -1; - - opencl_ctx_t *opencl_ctx = hashcat_ctx->opencl_ctx; + const bool has_vperm = test_instruction (hashcat_ctx, device_param, "__kernel void test () { uint r; __asm__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); - OCL_PTR *ocl = opencl_ctx->ocl; + device_param->has_vperm = has_vperm; - CL_rc = ocl->clBuildProgram (program, 1, &device_param->device, NULL, NULL, NULL); + const bool has_vadd3 = test_instruction (hashcat_ctx, device_param, "__kernel void test () { uint r; __asm__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); - if (CL_rc == CL_SUCCESS) - { - device_param->is_rocm = true; - } - else - { - device_param->is_rocm = false; - } - - CL_rc = hc_clReleaseProgram (hashcat_ctx, program); - - if (CL_rc == -1) return -1; + device_param->has_vadd3 = has_vadd3; } // device_available_mem @@ -4629,7 +4623,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; - if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || ((device_param->platform_vendor_id == VENDOR_ID_AMD) && (device_param->is_rocm == false)))) + if ((device_param->device_type & CL_DEVICE_TYPE_GPU) && ((device_param->platform_vendor_id == VENDOR_ID_NV) || (device_param->platform_vendor_id == VENDOR_ID_AMD))) { // OK, so the problem here is the following: // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, @@ -4875,9 +4869,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) char build_opts[2048] = { 0 }; #if defined (DEBUG) - snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); + snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); #else - snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D AMD_ROCM=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->is_rocm, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); + snprintf (build_opts, sizeof (build_opts), "%s -D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w", build_opts_base, device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); #endif /* @@ -4986,7 +4980,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (rc_read_kernel == false) return -1; - CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program); + CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program); if (CL_rc == -1) return -1; @@ -5065,7 +5059,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (rc_read_kernel == false) return -1; - CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program); + CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program); if (CL_rc == -1) return -1; @@ -5194,7 +5188,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (rc_read_kernel == false) return -1; - CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program_mp); + CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program_mp); if (CL_rc == -1) return -1; @@ -5341,7 +5335,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (rc_read_kernel == false) return -1; - CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, kernel_sources, NULL, &device_param->program_amp); + CL_rc = hc_clCreateProgramWithSource (hashcat_ctx, device_param->context, 1, (const char **) kernel_sources, NULL, &device_param->program_amp); if (CL_rc == -1) return -1;