Apply previous blowfish optimization for -m 3200 also on -m 9000 and -m 18600

pull/1971/head
jsteube 5 years ago
parent a172ab7d8a
commit 5ef67a8ab7

@ -39,6 +39,60 @@ typedef VTYPE(uint, VECT_SIZE) u32x;
typedef VTYPE(ulong, VECT_SIZE) u64x;
#endif
// unions
typedef union vconv32
{
u64 v32;
struct
{
u16 v16a;
u16 v16b;
};
struct
{
u8 v8a;
u8 v8b;
u8 v8c;
u8 v8d;
};
} vconv32_t;
typedef union vconv64
{
u64 v64;
struct
{
u32 v32a;
u32 v32b;
};
struct
{
u16 v16a;
u16 v16b;
u16 v16c;
u16 v16d;
};
struct
{
u8 v8a;
u8 v8b;
u8 v8c;
u8 v8d;
u8 v8e;
u8 v8f;
u8 v8g;
u8 v8h;
};
} vconv64_t;
DECLSPEC u32 l32_from_64_S (u64 a);
DECLSPEC u32 l32_from_64_S (u64 a)
{
@ -1426,3 +1480,189 @@ typedef struct keyboard_layout_mapping
int dst_len;
} keyboard_layout_mapping_t;
// functions
DECLSPEC u8 v8a_from_v32_S (const u32 v32);
DECLSPEC u8 v8a_from_v32_S (const u32 v32)
{
vconv32_t v;
v.v32 = v32;
return v.v8a;
}
DECLSPEC u8 v8b_from_v32_S (const u32 v32);
DECLSPEC u8 v8b_from_v32_S (const u32 v32)
{
vconv32_t v;
v.v32 = v32;
return v.v8b;
}
DECLSPEC u8 v8c_from_v32_S (const u32 v32);
DECLSPEC u8 v8c_from_v32_S (const u32 v32)
{
vconv32_t v;
v.v32 = v32;
return v.v8c;
}
DECLSPEC u8 v8d_from_v32_S (const u32 v32);
DECLSPEC u8 v8d_from_v32_S (const u32 v32)
{
vconv32_t v;
v.v32 = v32;
return v.v8d;
}
DECLSPEC u16 v16a_from_v32_S (const u32 v32);
DECLSPEC u16 v16a_from_v32_S (const u32 v32)
{
vconv32_t v;
v.v32 = v32;
return v.v16a;
}
DECLSPEC u16 v16b_from_v32_S (const u32 v32);
DECLSPEC u16 v16b_from_v32_S (const u32 v32)
{
vconv32_t v;
v.v32 = v32;
return v.v16b;
}
DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b);
DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b)
{
vconv32_t v;
v.v16a = v16a;
v.v16b = v16b;
return v.v32;
}
DECLSPEC u32 v32a_from_v64_S (const u64 v64);
DECLSPEC u32 v32a_from_v64_S (const u64 v64)
{
vconv64_t v;
v.v64 = v64;
return v.v32a;
}
DECLSPEC u32 v32b_from_v64_S (const u64 v64);
DECLSPEC u32 v32b_from_v64_S (const u64 v64)
{
vconv64_t v;
v.v64 = v64;
return v.v32b;
}
DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b);
DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b)
{
vconv64_t v;
v.v32a = v32a;
v.v32b = v32b;
return v.v64;
}
// unpack function are similar, but always return u32
DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32);
DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32)
{
u32 r;
#if defined IS_NV
asm ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32));
#elif defined IS_AMD
#ifdef HAS_VBFE
__asm__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 0) & 0xff;
#endif
#else
r = (v32 >> 0) & 0xff;
#endif
return r;
}
DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32);
DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32)
{
u32 r;
#if defined IS_NV
asm ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32));
#elif defined IS_AMD
#ifdef HAS_VBFE
__asm__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 8) & 0xff;
#endif
#else
r = (v32 >> 8) & 0xff;
#endif
return r;
}
DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32);
DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32)
{
u32 r;
#if defined IS_NV
asm ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32));
#elif defined IS_AMD
#ifdef HAS_VBFE
__asm__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 16) & 0xff;
#endif
#else
r = (v32 >> 16) & 0xff;
#endif
return r;
}
DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32);
DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32)
{
u32 r;
#if defined IS_NV
asm ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32));
#elif defined IS_AMD
#ifdef HAS_VBFE
__asm__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 24) & 0xff;
#endif
#else
r = (v32 >> 24) & 0xff;
#endif
return r;
}

@ -299,21 +299,21 @@ __constant u32a c_sbox3[256] =
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
};
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
const u32 r0 = hc_bfe_S ((L), 24, 8); \
const u32 r1 = hc_bfe_S ((L), 16, 8); \
const u32 r2 = hc_bfe_S ((L), 8, 8); \
const u32 r3 = hc_bfe_S ((L), 0, 8); \
\
tmp = S0[r0]; \
tmp += S1[r1]; \
tmp ^= S2[r2]; \
tmp += S3[r3]; \
\
(R) ^= tmp ^ P[(N)]; \
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
\
tmp = S0[r0]; \
tmp += S1[r1]; \
tmp ^= S2[r2]; \
tmp += S3[r3]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#define BF_ENCRYPT(L,R) \
@ -658,9 +658,6 @@ __kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m032
L0 = 0;
R0 = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 9; i++)
{
BF_ENCRYPT (L0, R0);
@ -723,9 +720,6 @@ __kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m032
L0 = 0;
R0 = 0;
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0; i < 9; i++)
{
BF_ENCRYPT (L0, R0);

@ -308,51 +308,22 @@ __constant u32a c_pbox[18] =
0x9216d5d9, 0x8979fb1b
};
#ifdef IS_AMD
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
tmp ^= S2[c.s1]; \
tmp += S3[c.s0]; \
\
(R) ^= tmp ^ P[(N)]; \
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
\
tmp = S0[r0]; \
tmp += S1[r1]; \
tmp ^= S2[r2]; \
tmp += S3[r3]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#ifdef IS_NV
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
tmp = S0[hc_bfe_S ((L), 24, 8)]; \
tmp += S1[hc_bfe_S ((L), 16, 8)]; \
tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \
tmp += S3[hc_bfe_S ((L), 0, 8)]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#ifdef IS_GENERIC
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
tmp ^= S2[c.s1]; \
tmp += S3[c.s0]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#define BF_ENCRYPT(L,R) \
{ \
@ -512,7 +483,7 @@ DECLSPEC void sha1_transform (const u32 *w0, const u32 *w1, const u32 *w2, const
digest[4] += E;
}
__kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
{
/**
* base
@ -626,10 +597,10 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
P[i] = c_pbox[i];
}
__local u32 S0_all[8][256];
__local u32 S1_all[8][256];
__local u32 S2_all[8][256];
__local u32 S3_all[8][256];
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
__local u32 *S0 = S0_all[lid];
__local u32 *S1 = S1_all[lid];
@ -731,7 +702,7 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
}
}
__kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
{
/**
* base
@ -752,23 +723,21 @@ __kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
u32 P[18];
#pragma unroll
for (u32 i = 0; i < 18; i++)
{
P[i] = tmps[gid].P[i];
}
__local u32 S0_all[8][256];
__local u32 S1_all[8][256];
__local u32 S2_all[8][256];
__local u32 S3_all[8][256];
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
__local u32 *S0 = S0_all[lid];
__local u32 *S1 = S1_all[lid];
__local u32 *S2 = S2_all[lid];
__local u32 *S3 = S3_all[lid];
#pragma unroll
for (u32 i = 0; i < 256; i++)
{
S0[i] = tmps[gid].S0[i];

@ -318,18 +318,21 @@ __constant u32a c_pbox[18] =
0x9216d5d9, 0x8979fb1b
};
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
tmp ^= S2[c.s1]; \
tmp += S3[c.s0]; \
\
(R) ^= tmp ^ P[(N)]; \
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
\
tmp = S0[r0]; \
tmp += S1[r1]; \
tmp ^= S2[r2]; \
tmp += S3[r3]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#define BF_ENCRYPT(L,R) \
@ -582,7 +585,7 @@ __kernel void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
}
}
__kernel void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
{
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
@ -612,10 +615,10 @@ __kernel void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
P[i] = c_pbox[i] ^ ukey[i % 4];
}
__local u32 S0_all[8][256];
__local u32 S1_all[8][256];
__local u32 S2_all[8][256];
__local u32 S3_all[8][256];
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
__local u32 *S0 = S0_all[lid];
__local u32 *S1 = S1_all[lid];

@ -43,7 +43,7 @@
## Improvements
##
- Cracking bcrypt: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
- Cracking bcrypt and Password Safe v2: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count
- Bitcoin Wallet: Be more user friendly by allowing a larger data range for ckey and public_key
- Building: Updated BUILD.md
- My Wallet: Added additional plaintext pattern used in newer versions

@ -1317,6 +1317,7 @@ typedef struct hc_device_param
bool has_vperm;
bool has_vadd3;
bool has_vbfe;
double spin_damp;

@ -80,11 +80,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
// based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
// which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
// therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
// on my development system. no clue where the 4 bytes are spent.
// I did some research on this and it seems to be related with the datatype.
// For example, if i used u8 instead, there's only 1 byte wasted.
u32 fixed_local_size = 0;
@ -94,7 +89,20 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
}
else
{
fixed_local_size = (device_param->device_local_mem_size - 4) / 4096;
u32 overhead = 0;
if (device_param->device_vendor_id == VENDOR_ID_NV)
{
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
// on my development system. no clue where the 4 bytes are spent.
// I did some research on this and it seems to be related with the datatype.
// For example, if i used u8 instead, there's only 1 byte wasted.
overhead = 4;
}
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);

@ -70,18 +70,31 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u32 kernel_threads_min = 8; // Blowfish
char *jit_build_options = NULL;
return kernel_threads_min;
}
u32 fixed_local_size = 0;
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 8; // Blowfish
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
{
fixed_local_size = 1;
}
else
{
u32 overhead = 0;
if (device_param->device_vendor_id == VENDOR_ID_NV)
{
overhead = 4;
}
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
return kernel_threads_max;
return jit_build_options;
}
bool module_outfile_check_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -183,14 +196,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -62,18 +62,31 @@ typedef struct odf11
static const char *SIGNATURE_ODF = "$odf$";
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u32 kernel_threads_max = 8; // Blowfish enforced
char *jit_build_options = NULL;
return kernel_threads_max;
}
u32 fixed_local_size = 0;
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 8; // Blowfish enforced
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
{
fixed_local_size = 1;
}
else
{
u32 overhead = 0;
if (device_param->device_vendor_id == VENDOR_ID_NV)
{
overhead = 4;
}
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
return kernel_threads_min;
return jit_build_options;
}
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@ -335,14 +348,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

@ -3958,6 +3958,10 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
const bool has_vadd3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
device_param->has_vadd3 = has_vadd3;
const bool has_vbfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }");
device_param->has_vbfe = has_vbfe;
}
// device_available_mem
@ -4852,9 +4856,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx)
// we don't have sm_* on vendors not NV but it doesn't matter
#if defined (DEBUG)
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
#else
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type);
#endif
/*

Loading…
Cancel
Save