From cf512faa53f3641c79e702f74e24f29ff26db092 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 14 Jul 2021 17:06:20 +0200 Subject: [PATCH] Update large switch() cases in inc_common.cl and some inline assembly common functions for devices managed with HIP backend --- OpenCL/inc_common.cl | 174 +++++++++++++++++----------------- OpenCL/inc_ecc_secp256k1.cl | 4 +- OpenCL/inc_rp_optimized.cl | 18 +--- OpenCL/inc_vendor.h | 2 +- OpenCL/m00500-optimized.cl | 6 +- OpenCL/m01500_a3-pure.cl | 4 +- OpenCL/m01600-optimized.cl | 6 +- OpenCL/m03000_a3-pure.cl | 4 +- OpenCL/m05800-optimized.cl | 2 +- OpenCL/m06300-optimized.cl | 6 +- OpenCL/m07400-optimized.cl | 14 +-- OpenCL/m07700_a0-optimized.cl | 4 +- OpenCL/m07700_a1-optimized.cl | 5 - OpenCL/m07700_a3-optimized.cl | 5 - OpenCL/m07701_a0-optimized.cl | 5 - OpenCL/m07701_a1-optimized.cl | 5 - OpenCL/m07701_a3-optimized.cl | 5 - OpenCL/m10700-optimized.cl | 4 +- OpenCL/m11600-pure.cl | 2 +- OpenCL/m12500-pure.cl | 2 +- OpenCL/m13800_a0-optimized.cl | 2 +- OpenCL/m13800_a1-optimized.cl | 2 +- OpenCL/m13800_a3-optimized.cl | 2 +- OpenCL/m14000_a3-pure.cl | 4 +- OpenCL/m23700-pure.cl | 2 +- OpenCL/m23800-pure.cl | 2 +- src/backend.c | 22 ++--- 27 files changed, 139 insertions(+), 174 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 26df19a2b..82b50b7c8 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -528,7 +528,7 @@ DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 0) & 0xff; @@ -575,7 +575,7 @@ DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 8) & 0xff; @@ -622,7 +622,7 @@ DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 16) & 0xff; @@ -669,7 +669,7 @@ DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32) asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 24) & 0xff; @@ -684,7 +684,7 @@ DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 0) & 0xff; @@ -699,7 +699,7 @@ DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 8) & 0xff; @@ -714,7 +714,7 @@ DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 16) & 0xff; @@ -729,7 +729,7 @@ DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32) #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32)); - //#elif defined IS_AMD && HAS_VBFE == 1 + //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 24) & 0xff; @@ -939,9 +939,9 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotl64 (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotl64 (a, n); #else #ifdef USE_ROTATE @@ -956,9 +956,9 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotr64 (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotr64 (a, n); #else #ifdef USE_ROTATE @@ -973,9 +973,9 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotl64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotl64_S (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotl64_S (a, n); #else #ifdef USE_ROTATE @@ -990,9 +990,9 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n) { #if defined _CPU_OPENCL_EMU_H return rotr64 (a, n); - #elif defined IS_CUDA || defined IS_HIP + #elif defined IS_CUDA return rotr64_S (a, n); - #elif defined IS_AMD + #elif (defined IS_AMD || defined IS_HIP) return rotr64_S (a, n); #else #ifdef USE_ROTATE @@ -1012,7 +1012,7 @@ DECLSPEC u32x hc_swap32 (const u32x v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_32 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; @@ -1109,7 +1109,7 @@ DECLSPEC u32 hc_swap32_S (const u32 v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_32 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(0x00010203)); #elif defined IS_NV && HAS_PRMT == 1 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v)); @@ -1135,7 +1135,7 @@ DECLSPEC u64x hc_swap64 (const u64x v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_64 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; @@ -1354,7 +1354,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v) #ifdef _CPU_OPENCL_EMU_H r = byte_swap_64 (v); #else - #if defined IS_AMD && HAS_VPERM == 1 + #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; const u32 v0 = h32_from_64_S (v); @@ -1399,7 +1399,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v) return r; } -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { @@ -2767,7 +2767,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x3727); out1[0] = hc_byte_perm (in[0], 0, 0x1707); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x03070207); out2[2] = hc_byte_perm (in[3], 0, 0x01070007); @@ -2805,7 +2805,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x1707); out1[0] = hc_byte_perm (in[0], 0, 0x3727); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x01070007); out2[2] = hc_byte_perm (in[3], 0, 0x03070207); @@ -2843,7 +2843,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x7372); out1[0] = hc_byte_perm (in[0], 0, 0x7170); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x07030702); out2[2] = hc_byte_perm (in[3], 0, 0x07010700); @@ -2881,7 +2881,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2) out1[1] = hc_byte_perm (in[0], 0, 0x7170); out1[0] = hc_byte_perm (in[0], 0, 0x7372); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out2[3] = hc_byte_perm (in[3], 0, 0x07010700); out2[2] = hc_byte_perm (in[3], 0, 0x07030702); @@ -2915,7 +2915,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out) out[2] = hc_byte_perm (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm (in2[2], in2[3], 0x4602); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002); @@ -2945,7 +2945,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out) out[2] = hc_byte_perm (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm (in2[2], in2[3], 0x6420); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200); @@ -3069,7 +3069,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -3394,7 +3394,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -3404,7 +3404,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -3737,7 +3737,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le (u32x *w0, u32x *w1, u32x *w2, u3 { const int offset_switch = offset / 4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -4665,7 +4665,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -4990,13 +4990,13 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -5329,7 +5329,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -5790,13 +5790,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -6265,7 +6265,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -7422,7 +7422,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -7432,7 +7432,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -8005,7 +8005,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -9690,7 +9690,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -9700,7 +9700,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2 const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -11393,7 +11393,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -12550,13 +12550,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -13721,7 +13721,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -15406,13 +15406,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -17105,7 +17105,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -21462,7 +21462,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -21472,7 +21472,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset) const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -25837,7 +25837,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -30194,13 +30194,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -36533,7 +36533,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2) out1[1] = hc_byte_perm_S (in[0], 0, 0x3727); out1[0] = hc_byte_perm_S (in[0], 0, 0x1707); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007); @@ -36571,7 +36571,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2) out1[1] = hc_byte_perm_S (in[0], 0, 0x7372); out1[0] = hc_byte_perm_S (in[0], 0, 0x7170); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702); out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700); @@ -36605,7 +36605,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out) out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002); @@ -36635,7 +36635,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out) out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420); - #elif defined IS_AMD && HAS_VPERM + #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200); @@ -36660,7 +36660,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -36985,7 +36985,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -36995,7 +36995,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -37328,7 +37328,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32 { const int offset_switch = offset / 4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38256,7 +38256,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38581,13 +38581,13 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -38920,7 +38920,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -39381,13 +39381,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32 } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -39856,7 +39856,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -41013,7 +41013,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -41023,7 +41023,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 * const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -41596,7 +41596,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -43281,7 +43281,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -43291,7 +43291,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2, const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -44984,7 +44984,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 * { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -46141,13 +46141,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 * } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -47312,7 +47312,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -48997,13 +48997,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2, } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif @@ -50696,7 +50696,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -55053,7 +55053,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -55063,7 +55063,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset) const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -59428,7 +59428,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -63785,13 +63785,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset) } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl index e21f528d6..b3a70df78 100644 --- a/OpenCL/inc_ecc_secp256k1.cl +++ b/OpenCL/inc_ecc_secp256k1.cl @@ -124,7 +124,7 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b) : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) ); - #elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1 + #elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1 __asm__ __volatile__ ( "V_SUB_U32 %0, %9, %17;" @@ -176,7 +176,7 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b) : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]), "r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7]) ); - #elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1 + #elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1 __asm__ __volatile__ ( "V_ADD_U32 %0, %9, %17;" diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index dc3754907..026198f09 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -781,7 +781,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c const int offset_switch = offset / 4; - #if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 src_r00 = src_r0[0]; const u32 src_r01 = src_r0[1]; const u32 src_r02 = src_r0[2]; @@ -884,7 +884,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c } #endif - #if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV + #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; @@ -894,7 +894,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif - #if defined IS_AMD + #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif @@ -1359,11 +1359,7 @@ DECLSPEC u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; - #ifdef IS_AMD const int p0_switch = p0 / 4; - #else - const int p0_switch = p0 / 4; - #endif switch (p0_switch) { @@ -1466,11 +1462,7 @@ DECLSPEC u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const const u32 ml = (1 << ((p0 & 3) * 8)) - 1; const u32 mr = ~ml; - #ifdef IS_AMD const int p0_switch = p0 / 4; - #else - const int p0_switch = p0 / 4; - #endif switch (p0_switch) { @@ -1552,11 +1544,7 @@ DECLSPEC u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED cons const u32 mr = 0xffffff00 << ((p0 & 3) * 8); - #ifdef IS_AMD const int p0_switch = p0 / 4; - #else - const int p0_switch = p0 / 4; - #endif switch (p0_switch) { diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index d98a85053..a94bbefd4 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -96,7 +96,7 @@ #elif VENDOR_ID == (1 << 8) #define IS_AMD_USE_HIP // TODO HIP optimization potential -#define IS_GENERIC +//#define IS_GENERIC #else #define IS_GENERIC #endif diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl index 6ea000442..19f7153ff 100644 --- a/OpenCL/m00500-optimized.cl +++ b/OpenCL/m00500-optimized.cl @@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -139,7 +139,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -246,7 +246,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m01500_a3-pure.cl b/OpenCL/m01500_a3-pure.cl index f7a8ad45c..c2c4245e1 100644 --- a/OpenCL/m01500_a3-pure.cl +++ b/OpenCL/m01500_a3-pure.cl @@ -19,7 +19,7 @@ #define KXX_DECL #endif -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) #define KXX_DECL #endif @@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const #endif #endif -#if defined IS_AMD || defined IS_GENERIC +#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC /* * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl index 6489a04b8..cfaad44cc 100644 --- a/OpenCL/m01600-optimized.cl +++ b/OpenCL/m01600-optimized.cl @@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -138,7 +138,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -245,7 +245,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m03000_a3-pure.cl b/OpenCL/m03000_a3-pure.cl index a44b6f065..65beaabda 100644 --- a/OpenCL/m03000_a3-pure.cl +++ b/OpenCL/m03000_a3-pure.cl @@ -19,7 +19,7 @@ #define KXX_DECL #endif -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) #define KXX_DECL #endif @@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const #endif #endif -#if defined IS_AMD || defined IS_GENERIC +#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC /* * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index b247b05e0..38099159f 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u u32 tmp4; u32 tmp5; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl index c3d320c95..b7c9ddddd 100644 --- a/OpenCL/m06300-optimized.cl +++ b/OpenCL/m06300-optimized.cl @@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -135,7 +135,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, u32 tmp3; u32 tmp4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; @@ -242,7 +242,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const u32 tmp1; u32 tmp2; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index df1f3478f..7efa5c94e 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in2 = append[2]; u32 in3 = append[3]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -165,7 +165,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u u32 in2 = append[2]; u32 in3 = append[3]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -322,7 +322,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u u32 in3 = append[3]; u32 in4 = append[4]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -456,7 +456,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const u32 in3 = append[3]; u32 in4 = append[4]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset); @@ -756,7 +756,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in2 = append[2]; u32 in3 = append[3]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -915,7 +915,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons u32 in3 = append[3]; u32 in4 = 0x80000000; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); @@ -1074,7 +1074,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3 u32 in3 = append[3]; u32 in4 = append[4]; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); diff --git a/OpenCL/m07700_a0-optimized.cl b/OpenCL/m07700_a0-optimized.cl index 81a69e83e..165691e6d 100644 --- a/OpenCL/m07700_a0-optimized.cl +++ b/OpenCL/m07700_a0-optimized.cl @@ -17,13 +17,15 @@ #include "inc_hash_md5.cl" #endif +/* #ifdef IS_AMD #define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) #define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) #else +*/ + #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07700_a1-optimized.cl b/OpenCL/m07700_a1-optimized.cl index 9431d66a6..77a3bb26a 100644 --- a/OpenCL/m07700_a1-optimized.cl +++ b/OpenCL/m07700_a1-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07700_a3-optimized.cl b/OpenCL/m07700_a3-optimized.cl index 53dbb1fe2..e867cb070 100644 --- a/OpenCL/m07700_a3-optimized.cl +++ b/OpenCL/m07700_a3-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif CONSTANT_VK u32a sapb_trans_tbl[256] = { diff --git a/OpenCL/m07701_a0-optimized.cl b/OpenCL/m07701_a0-optimized.cl index b530785a7..55430df32 100644 --- a/OpenCL/m07701_a0-optimized.cl +++ b/OpenCL/m07701_a0-optimized.cl @@ -17,13 +17,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07701_a1-optimized.cl b/OpenCL/m07701_a1-optimized.cl index e1ae00412..425bb3a04 100644 --- a/OpenCL/m07701_a1-optimized.cl +++ b/OpenCL/m07701_a1-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif #define SETSHIFTEDINT(a,n,v) \ { \ diff --git a/OpenCL/m07701_a3-optimized.cl b/OpenCL/m07701_a3-optimized.cl index ae6762e90..934c943f0 100644 --- a/OpenCL/m07701_a3-optimized.cl +++ b/OpenCL/m07701_a3-optimized.cl @@ -15,13 +15,8 @@ #include "inc_hash_md5.cl" #endif -#ifdef IS_AMD -#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff) -#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8)))) -#else #define GETCHAR(a,p) ((u8 *)(a))[(p)] #define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c) -#endif CONSTANT_VK u32a sapb_trans_tbl[256] = { diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index bf311a22e..a9b50a6ac 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl, u32 i; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC for (i = 0; i < pd; i++) sc[idx++] = pw[i]; sc[idx++] = pw[i] | hc_bytealign_be (bl[0], 0, pm4); @@ -263,7 +263,7 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con const u32 om = m % 4; const u32 od = m / 4; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om); pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om); pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om); diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 469365ae6..be42e185b 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -42,7 +42,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index ce1b2cb53..f8ed47771 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -37,7 +37,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index a036044b0..6758ffbd4 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 3b462466e..85e711b94 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index 9ad06a344..65b759de0 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry) u32x tmp15; u32x tmp16; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); diff --git a/OpenCL/m14000_a3-pure.cl b/OpenCL/m14000_a3-pure.cl index f44e0bbfc..84b419923 100644 --- a/OpenCL/m14000_a3-pure.cl +++ b/OpenCL/m14000_a3-pure.cl @@ -19,7 +19,7 @@ #define KXX_DECL #endif -#ifdef IS_AMD +#if (defined IS_AMD || defined IS_HIP) #define KXX_DECL #endif @@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const #endif #endif -#if defined IS_AMD || defined IS_GENERIC +#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC /* * Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl index ec62394a1..af287574e 100644 --- a/OpenCL/m23700-pure.cl +++ b/OpenCL/m23700-pure.cl @@ -145,7 +145,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl index 1629433c0..f6d345677 100644 --- a/OpenCL/m23800-pure.cl +++ b/OpenCL/m23800-pure.cl @@ -56,7 +56,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co u32 tmp0; u32 tmp1; - #if defined IS_AMD || defined IS_GENERIC + #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC tmp0 = hc_bytealign_be (0, append, func_len); tmp1 = hc_bytealign_be (append, 0, func_len); #endif diff --git a/src/backend.c b/src/backend.c index d63ee3a1d..53de2d525 100644 --- a/src/backend.c +++ b/src/backend.c @@ -8339,17 +8339,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->has_mov64 = false; device_param->has_prmt = false; - device_param->has_vadd = false; - device_param->has_vaddc = false; - device_param->has_vadd_co = false; - device_param->has_vaddc_co = false; - device_param->has_vsub = false; - device_param->has_vsubb = false; - device_param->has_vsub_co = false; - device_param->has_vsubb_co = false; - device_param->has_vadd3 = false; - device_param->has_vbfe = false; - device_param->has_vperm = false; + device_param->has_vadd = true; + device_param->has_vaddc = true; + device_param->has_vadd_co = true; + device_param->has_vaddc_co = true; + device_param->has_vsub = true; + device_param->has_vsubb = true; + device_param->has_vsub_co = true; + device_param->has_vsubb_co = true; + device_param->has_vadd3 = true; + device_param->has_vbfe = true; + device_param->has_vperm = true; // device_available_mem