mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-25 09:28:20 +00:00
PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling
Inspired by https://github.com/reger-men/hashcat/blob/6.2.4/OpenCL/m10700-optimized.cl
This commit is contained in:
parent
1d33b57144
commit
aee8e559c4
@ -130,6 +130,17 @@
|
|||||||
#define DECLSPEC
|
#define DECLSPEC
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define INLINE0 __attribute__ ((noinline))
|
||||||
|
#define INLINE1 __attribute__ ((inline))
|
||||||
|
|
||||||
|
#if defined IS_AMD && defined IS_GPU
|
||||||
|
#define INLINE INLINE0
|
||||||
|
#elif defined IS_HIP
|
||||||
|
#define INLINE INLINE0
|
||||||
|
#else
|
||||||
|
#define INLINE
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* AMD specific
|
* AMD specific
|
||||||
*/
|
*/
|
||||||
|
@ -315,7 +315,7 @@ DECLSPEC void make_w_with_offset (ctx_t *ctx, const u32 W_len, const u32 offset,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLSPEC u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
DECLSPEC INLINE u32 do_round (LOCAL_AS u32 *sc, const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
||||||
{
|
{
|
||||||
// make scratch buffer
|
// make scratch buffer
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
## Performance
|
## Performance
|
||||||
##
|
##
|
||||||
|
|
||||||
- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer
|
- PDF Kernel (10700): Improved performance on AMD GPU by using shared memory for the scratch buffer and disable inlining to save spilling
|
||||||
|
|
||||||
##
|
##
|
||||||
## Bugs
|
## Bugs
|
||||||
|
Loading…
Reference in New Issue
Block a user