From aee8e559c476115fd11a9c36a0bcd22781da39c1 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sun, 31 Oct 2021 10:05:58 +0100 Subject: [PATCH] PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling Inspired by https://github.com/reger-men/hashcat/blob/6.2.4/OpenCL/m10700-optimized.cl --- OpenCL/inc_vendor.h | 11 +++++++++++ OpenCL/m10700-optimized.cl | 2 +- docs/changes.txt | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 43f5d8fb8..7ab1b46b0 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -130,6 +130,17 @@ #define DECLSPEC #endif +#define INLINE0 __attribute__ ((noinline)) +#define INLINE1 __attribute__ ((inline)) + +#if defined IS_AMD && defined IS_GPU +#define INLINE INLINE0 +#elif defined IS_HIP +#define INLINE INLINE0 +#else +#define INLINE +#endif + /** * AMD specific */ diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index 8450f0480..6ff23bd73 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -315,7 +315,7 @@ DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, } } -DECLSPEC u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +DECLSPEC INLINE u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { // make scratch buffer diff --git a/docs/changes.txt b/docs/changes.txt index ef648bef2..efbc35dbc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -13,7 +13,7 @@ ## Performance ## -- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer +- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling ## ## Bugs