From 4359418918f850b0eaa65d0177b2f69cec366161 Mon Sep 17 00:00:00 2001 From: jsteube Date: Fri, 1 Mar 2019 15:34:49 +0100 Subject: [PATCH] OpenCL Runtime: Not using amd_bytealign (amd_bitalign is fine) on AMDGPU driver drastically reduces JiT segfaults --- OpenCL/inc_types.cl | 44 ++++++++++++++++++++++++++++++++++++++++---- OpenCL/inc_vendor.cl | 4 +++- docs/changes.txt | 1 + 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/OpenCL/inc_types.cl b/OpenCL/inc_types.cl index 32ca1d109..dda8b416b 100644 --- a/OpenCL/inc_types.cl +++ b/OpenCL/inc_types.cl @@ -353,22 +353,58 @@ DECLSPEC u64x rotl64 (const u64x a, const u32 n) DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { - return amd_bfe (a, b, c); + #define BIT(x) ((u32x) (1u) << (x)) + #define BIT_MASK(x) (BIT (x) - 1) + #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) + + return BFE (a, b, c); + + #undef BIT + #undef BIT_MASK + #undef BFE } DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) { - return amd_bfe (a, b, c); + #define BIT(x) (1u << (x)) + #define BIT_MASK(x) (BIT (x) - 1) + #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) + + return BFE (a, b, c); + + #undef BIT + #undef BIT_MASK + #undef BFE } DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const u32 c) { - return amd_bytealign (a, b, c); + u32x r; + + switch (c & 3) + { + case 0: r = b; break; + case 1: r = (a << 24) | (b >> 8); break; + case 2: r = (a << 16) | (b >> 16); break; + case 3: r = (a << 8) | (b >> 24); break; + } + + return r; } DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const u32 c) { - return amd_bytealign (a, b, c); + u32 r; + + switch (c & 3) + { + case 0: r = b; break; + case 1: r = (a << 24) | (b >> 8); break; + case 2: r = (a << 16) | (b >> 16); break; + case 3: r = (a << 8) | (b >> 24); break; + } + + return r; } DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const u32 c) diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index 18fce9a16..952a3b849 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -78,7 +78,9 @@ // HAS_VPERM indicated ROCM #if (defined IS_AMD && HAS_VPERM == 0) -#define MAYBE_VOLATILE volatile +//#define MAYBE_VOLATILE volatile +//testrun for hashcat 6.0.0 +#define MAYBE_VOLATILE #else #define MAYBE_VOLATILE #endif diff --git a/docs/changes.txt b/docs/changes.txt index c4083b1e1..dca89ed64 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -36,6 +36,7 @@ - Bitcoin Wallet: Be more user friendly by allowing a larger data range for ckey and public_key - OpenCL Runtime: Workaround JiT compiler error on AMDGPU driver compiling WPA-EAPOL-PBKDF2 OpenCL kernel - OpenCL Runtime: Improve ROCM detection and make sure to not confuse with recent AMDGPU drivers +- OpenCL Runtime: Not using amd_bytealign (amd_bitalign is fine) on AMDGPU driver drastically reduces JiT segfaults ## ## Technical