mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-13 17:21:10 +00:00
Use __launch_bounds__ in CUDA as replacement for reqd_work_group_size() in OpenCL
This commit is contained in:
parent
dbbdb7e5ac
commit
5e0eb288c9
@ -8,6 +8,7 @@
|
||||
#include "inc_platform.h"
|
||||
|
||||
#ifdef IS_NATIVE
|
||||
#define FIXED_THREAD_COUNT(n)
|
||||
#define SYNC_THREADS()
|
||||
#endif
|
||||
|
||||
@ -107,9 +108,11 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
||||
return ((a >> n) | ((a << (64 - n))));
|
||||
}
|
||||
|
||||
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
||||
#define SYNC_THREADS() __syncthreads ()
|
||||
#endif
|
||||
|
||||
#ifdef IS_OPENCL
|
||||
#define FIXED_THREAD_COUNT(n) __attribute__((reqd_work_group_size((n), 1, 1)))
|
||||
#define SYNC_THREADS() barrier (CLK_LOCAL_MEM_FENCE)
|
||||
#endif
|
||||
|
@ -376,7 +376,7 @@ DECLSPEC void expand_key (u32 *E, u32 *W, const int len)
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03200_init (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_init (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -587,7 +587,7 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03200_loop (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_loop (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -779,7 +779,7 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m03200_comp (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m03200_comp (KERN_ATTR_TMPS (bcrypt_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
|
@ -357,7 +357,7 @@ CONSTANT_VK u32a c_pbox[18] =
|
||||
L ^= P[17]; \
|
||||
}
|
||||
|
||||
KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
@ -576,7 +576,7 @@ KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
|
@ -586,7 +586,7 @@ KERNEL_FQ void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
||||
{
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
@ -68,7 +68,7 @@ int hc_cuMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dstDev
|
||||
int hc_cuMemFree (hashcat_ctx_t *hashcat_ctx, CUdeviceptr dptr);
|
||||
int hc_cuModuleGetFunction (hashcat_ctx_t *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
|
||||
int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
|
||||
int hc_cuModuleLoadDataExLog (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, const u64 threads_per_block);
|
||||
int hc_cuModuleLoadDataExLog (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image);
|
||||
int hc_cuModuleUnload (hashcat_ctx_t *hashcat_ctx, CUmodule hmod);
|
||||
int hc_cuStreamCreate (hashcat_ctx_t *hashcat_ctx, CUstream *phStream, unsigned int Flags);
|
||||
int hc_cuStreamDestroy (hashcat_ctx_t *hashcat_ctx, CUstream hStream);
|
||||
|
@ -1297,15 +1297,15 @@ int hc_cuModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const v
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hc_cuModuleLoadDataExLog (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image, const u64 threads_per_block)
|
||||
int hc_cuModuleLoadDataExLog (hashcat_ctx_t *hashcat_ctx, CUmodule *module, const void *image)
|
||||
{
|
||||
#define LOG_SIZE 8192
|
||||
|
||||
char *info_log = hcmalloc (LOG_SIZE);
|
||||
char *error_log = hcmalloc (LOG_SIZE);
|
||||
|
||||
CUjit_option opts[7];
|
||||
void *vals[7];
|
||||
CUjit_option opts[6];
|
||||
void *vals[6];
|
||||
|
||||
opts[0] = CU_JIT_TARGET_FROM_CUCONTEXT;
|
||||
vals[0] = (void *) 0;
|
||||
@ -1325,17 +1325,7 @@ int hc_cuModuleLoadDataExLog (hashcat_ctx_t *hashcat_ctx, CUmodule *module, cons
|
||||
opts[5] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
|
||||
vals[5] = (void *) LOG_SIZE;
|
||||
|
||||
int opts_cnt = 6;
|
||||
|
||||
if ((threads_per_block > 0) && (threads_per_block < 1024))
|
||||
{
|
||||
opts[6] = CU_JIT_THREADS_PER_BLOCK;
|
||||
vals[6] = (void *) threads_per_block;
|
||||
|
||||
opts_cnt++;
|
||||
}
|
||||
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, module, image, opts_cnt, opts, vals);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataEx (hashcat_ctx, module, image, 6, opts, vals);
|
||||
|
||||
#if defined (DEBUG)
|
||||
printf ("cuModuleLoadDataEx() Info Log (%d):\n%s\n\n", (int) strlen (info_log), info_log);
|
||||
@ -7778,7 +7768,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (rc_nvrtcDestroyProgram == -1) return -1;
|
||||
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, binary, device_param->kernel_threads_max);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, binary);
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
|
||||
@ -7864,7 +7854,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, kernel_sources[0], device_param->kernel_threads_max);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module, kernel_sources[0]);
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
}
|
||||
@ -8032,7 +8022,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
// tbd: check for some useful options
|
||||
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, binary, 0);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, binary);
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
|
||||
@ -8116,7 +8106,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, kernel_sources[0], 0);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_mp, kernel_sources[0]);
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
}
|
||||
@ -8287,7 +8277,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
// tbd: check for some useful options
|
||||
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_amp, binary, 0);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_amp, binary);
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
|
||||
@ -8371,7 +8361,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_amp, kernel_sources[0], 0);
|
||||
const int rc_cuModuleLoadDataEx = hc_cuModuleLoadDataExLog (hashcat_ctx, &device_param->cuda_module_amp, kernel_sources[0]);
|
||||
|
||||
if (rc_cuModuleLoadDataEx == -1) return -1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user