PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling

Inspired by https://github.com/reger-men/hashcat/blob/6.2.4/OpenCL/m10700-optimized.cl
pull/3017/head
Jens Steube 3 years ago
parent 1d33b57144
commit aee8e559c4

@ -130,6 +130,17 @@
#define DECLSPEC
#endif
#define INLINE0 __attribute__ ((noinline))
#define INLINE1 __attribute__ ((inline))
#if defined IS_AMD && defined IS_GPU
#define INLINE INLINE0
#elif defined IS_HIP
#define INLINE INLINE0
#else
#define INLINE
#endif
/**
* AMD specific
*/

@ -315,7 +315,7 @@ DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset,
}
}
DECLSPEC u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
DECLSPEC INLINE u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
// make scratch buffer

@ -13,7 +13,7 @@
## Performance
##
- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer
- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling
##
## Bugs

Loading…
Cancel
Save