1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-25 09:28:20 +00:00

PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling

Inspired by https://github.com/reger-men/hashcat/blob/6.2.4/OpenCL/m10700-optimized.cl
This commit is contained in:
Jens Steube 2021-10-31 10:05:58 +01:00
parent 1d33b57144
commit aee8e559c4
3 changed files with 13 additions and 2 deletions

View File

@ -130,6 +130,17 @@
#define DECLSPEC #define DECLSPEC
#endif #endif
#define INLINE0 __attribute__ ((noinline))
#define INLINE1 __attribute__ ((inline))
#if defined IS_AMD && defined IS_GPU
#define INLINE INLINE0
#elif defined IS_HIP
#define INLINE INLINE0
#else
#define INLINE
#endif
/** /**
* AMD specific * AMD specific
*/ */

View File

@ -315,7 +315,7 @@ DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset,
} }
} }
DECLSPEC u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) DECLSPEC INLINE u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{ {
// make scratch buffer // make scratch buffer

View File

@ -13,7 +13,7 @@
## Performance ## Performance
## ##
- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer - PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling
## ##
## Bugs ## Bugs