Use inline static on HIP for some hash-modes which benefit from it

pull/3508/head
Jens Steube 2 years ago
parent 29a3ea2583
commit f6537a2964

@ -142,17 +142,20 @@ using namespace metal;
* fast but pure kernels on rocm is a good example
*/
#ifdef NO_INLINE
#define HC_INLINE
#else
#define HC_INLINE inline static
#endif
#if defined IS_AMD && defined IS_GPU
#define DECLSPEC inline static
#define DECLSPEC HC_INLINE
#elif defined IS_HIP
#define DECLSPEC __device__
#define DECLSPEC __device__ HC_INLINE
#else
#define DECLSPEC
#endif
#define HC_INLINE0 __attribute__ ((noinline))
#define HC_INLINE1 __attribute__ ((inline))
/**
* AMD specific
*/

@ -17,14 +17,6 @@
#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl)
#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl)
#if defined IS_AMD && defined IS_GPU
#define HC_INLINE
#elif defined IS_HIP
#define HC_INLINE HC_INLINE0
#else
#define HC_INLINE
#endif
typedef struct pdf
{
int V;
@ -323,7 +315,7 @@ DECLSPEC void make_w_with_offset (PRIVATE_AS ctx_t *ctx, const u32 W_len, const
}
}
DECLSPEC HC_INLINE u32 do_round (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_len, PRIVATE_AS ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
DECLSPEC u32 do_round (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_len, PRIVATE_AS ctx_t *ctx, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
{
// make scratch buffer

@ -149,6 +149,16 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
}
}
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 1)
{
// this is a workaround to avoid a compile time of over an hour (and then to not work) on ROCM in pure kernel mode
hc_asprintf (&jit_build_options, "-D NO_INLINE");
}
}
return jit_build_options;
}

@ -49,6 +49,20 @@ u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig,
const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; }
const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; }
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
// this is a workaround to avoid a compile time of over an hour (and then to not work) on ROCM in pure kernel mode
hc_asprintf (&jit_build_options, "-D NO_INLINE");
}
return jit_build_options;
}
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
{
u32 *digest = (u32 *) digest_buf;
@ -184,7 +198,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;

@ -52,6 +52,20 @@ typedef struct qnx_md5_tmp
static const int ROUNDS_QNX = 1000;
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
// this is a workaround to avoid a compile time of over an hour (and then to not work) on ROCM in pure kernel mode
hc_asprintf (&jit_build_options, "-D NO_INLINE");
}
return jit_build_options;
}
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 tmp_size = (const u64) sizeof (qnx_md5_tmp_t);
@ -226,7 +240,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;

@ -54,6 +54,20 @@ typedef struct sha1_double_salt
} sha1_double_salt_t;
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
// this is a workaround to avoid a compile time of over an hour (and then to not work) on ROCM in pure kernel mode
hc_asprintf (&jit_build_options, "-D NO_INLINE");
}
return jit_build_options;
}
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 esalt_size = (const u64) sizeof (sha1_double_salt_t);
@ -242,7 +256,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;

@ -53,6 +53,20 @@ typedef struct devise_hash
} devise_hash_t;
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
{
// this is a workaround to avoid a compile time of over an hour (and then to not work) on ROCM in pure kernel mode
hc_asprintf (&jit_build_options, "-D NO_INLINE");
}
return jit_build_options;
}
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 esalt_size = (const u64) sizeof (devise_hash_t);
@ -241,7 +255,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;

Loading…
Cancel
Save