diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 43f5d8fb8..7ab1b46b0 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -130,6 +130,17 @@ #define DECLSPEC #endif +#define INLINE0 __attribute__ ((noinline)) +#define INLINE1 __attribute__ ((inline)) + +#if defined IS_AMD && defined IS_GPU +#define INLINE INLINE0 +#elif defined IS_HIP +#define INLINE INLINE0 +#else +#define INLINE +#endif + /** * AMD specific */ diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index 8450f0480..6ff23bd73 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -315,7 +315,7 @@ DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset, } } -DECLSPEC u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +DECLSPEC INLINE u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { // make scratch buffer diff --git a/docs/changes.txt b/docs/changes.txt index ef648bef2..efbc35dbc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -13,7 +13,7 @@ ## Performance ## -- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer +- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling ## ## Bugs