|
|
|
@ -319,6 +319,51 @@ CONSTANT_VK u32a c_pbox[18] =
|
|
|
|
|
0x9216d5d9, 0x8979fb1b
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Yes, works only with CUDA atm
|
|
|
|
|
|
|
|
|
|
#ifdef DYNAMIC_LOCAL
|
|
|
|
|
#define BCRYPT_AVOID_BANK_CONFLICTS
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef BCRYPT_AVOID_BANK_CONFLICTS
|
|
|
|
|
|
|
|
|
|
// access pattern: minimize bank ID based on thread ID but thread ID is not saved from computation
|
|
|
|
|
|
|
|
|
|
#define KEY32(lid,key) (((key) * FIXED_LOCAL_SIZE_COMP) + (lid))
|
|
|
|
|
|
|
|
|
|
DECLSPEC u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
|
|
|
|
|
{
|
|
|
|
|
const u64 lid = get_local_id (0);
|
|
|
|
|
|
|
|
|
|
return S[KEY32 (lid, key)];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
|
|
|
|
|
{
|
|
|
|
|
const u64 lid = get_local_id (0);
|
|
|
|
|
|
|
|
|
|
S[KEY32 (lid, key)] = val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#undef KEY32
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// access pattern: linear access with S offset already set to right offset based on thread ID saving it from compuation
|
|
|
|
|
// makes sense if there are not thread ID's (for instance on CPU)
|
|
|
|
|
|
|
|
|
|
DECLSPEC inline u32 GET_KEY32 (LOCAL_AS u32 *S, const u64 key)
|
|
|
|
|
{
|
|
|
|
|
return S[key];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DECLSPEC inline void SET_KEY32 (LOCAL_AS u32 *S, const u64 key, const u32 val)
|
|
|
|
|
{
|
|
|
|
|
S[key] = val;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define BF_ROUND(L,R,N) \
|
|
|
|
|
{ \
|
|
|
|
|
u32 tmp; \
|
|
|
|
@ -328,10 +373,10 @@ CONSTANT_VK u32a c_pbox[18] =
|
|
|
|
|
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
|
|
|
|
|
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
|
|
|
|
|
\
|
|
|
|
|
tmp = S0[r0]; \
|
|
|
|
|
tmp += S1[r1]; \
|
|
|
|
|
tmp ^= S2[r2]; \
|
|
|
|
|
tmp += S3[r3]; \
|
|
|
|
|
tmp = GET_KEY32 (S0, r0); \
|
|
|
|
|
tmp += GET_KEY32 (S1, r1); \
|
|
|
|
|
tmp ^= GET_KEY32 (S2, r2); \
|
|
|
|
|
tmp += GET_KEY32 (S3, r3); \
|
|
|
|
|
\
|
|
|
|
|
(R) ^= tmp ^ P[(N)]; \
|
|
|
|
|
}
|
|
|
|
@ -366,6 +411,10 @@ CONSTANT_VK u32a c_pbox[18] =
|
|
|
|
|
L ^= P[17]; \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef DYNAMIC_LOCAL
|
|
|
|
|
extern __shared__ u32 S[];
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest)
|
|
|
|
|
{
|
|
|
|
|
digest[0] = ipad[0];
|
|
|
|
@ -586,7 +635,7 @@ KERNEL_FQ void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
|
|
|
|
KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
|
|
|
|
|
{
|
|
|
|
|
const u64 gid = get_global_id (0);
|
|
|
|
|
const u64 lid = get_local_id (0);
|
|
|
|
@ -616,22 +665,33 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_
|
|
|
|
|
P[i] = c_pbox[i] ^ ukey[i % 4];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
#ifdef DYNAMIC_LOCAL
|
|
|
|
|
// from host
|
|
|
|
|
#else
|
|
|
|
|
LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE_COMP][256];
|
|
|
|
|
LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE_COMP][256];
|
|
|
|
|
LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE_COMP][256];
|
|
|
|
|
LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE_COMP][256];
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef BCRYPT_AVOID_BANK_CONFLICTS
|
|
|
|
|
LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 0);
|
|
|
|
|
LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 1);
|
|
|
|
|
LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 2);
|
|
|
|
|
LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE_COMP * 256 * 3);
|
|
|
|
|
#else
|
|
|
|
|
LOCAL_AS u32 *S0 = S0_all[lid];
|
|
|
|
|
LOCAL_AS u32 *S1 = S1_all[lid];
|
|
|
|
|
LOCAL_AS u32 *S2 = S2_all[lid];
|
|
|
|
|
LOCAL_AS u32 *S3 = S3_all[lid];
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
for (u32 i = 0; i < 256; i++)
|
|
|
|
|
{
|
|
|
|
|
S0[i] = c_sbox0[i];
|
|
|
|
|
S1[i] = c_sbox1[i];
|
|
|
|
|
S2[i] = c_sbox2[i];
|
|
|
|
|
S3[i] = c_sbox3[i];
|
|
|
|
|
SET_KEY32 (S0, i, c_sbox0[i]);
|
|
|
|
|
SET_KEY32 (S1, i, c_sbox1[i]);
|
|
|
|
|
SET_KEY32 (S2, i, c_sbox2[i]);
|
|
|
|
|
SET_KEY32 (S3, i, c_sbox3[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
u32 L0 = 0;
|
|
|
|
@ -649,52 +709,52 @@ KERNEL_FQ void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE) m18600_comp (KERN_ATTR_TMPS_
|
|
|
|
|
{
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S0[i + 0] = L0;
|
|
|
|
|
S0[i + 1] = R0;
|
|
|
|
|
SET_KEY32 (S0, i + 0, L0);
|
|
|
|
|
SET_KEY32 (S0, i + 1, R0);
|
|
|
|
|
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S0[i + 2] = L0;
|
|
|
|
|
S0[i + 3] = R0;
|
|
|
|
|
SET_KEY32 (S0, i + 2, L0);
|
|
|
|
|
SET_KEY32 (S0, i + 3, R0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (u32 i = 0; i < 256; i += 4)
|
|
|
|
|
{
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S1[i + 0] = L0;
|
|
|
|
|
S1[i + 1] = R0;
|
|
|
|
|
SET_KEY32 (S1, i + 0, L0);
|
|
|
|
|
SET_KEY32 (S1, i + 1, R0);
|
|
|
|
|
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S1[i + 2] = L0;
|
|
|
|
|
S1[i + 3] = R0;
|
|
|
|
|
SET_KEY32 (S1, i + 2, L0);
|
|
|
|
|
SET_KEY32 (S1, i + 3, R0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (u32 i = 0; i < 256; i += 4)
|
|
|
|
|
{
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S2[i + 0] = L0;
|
|
|
|
|
S2[i + 1] = R0;
|
|
|
|
|
SET_KEY32 (S2, i + 0, L0);
|
|
|
|
|
SET_KEY32 (S2, i + 1, R0);
|
|
|
|
|
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S2[i + 2] = L0;
|
|
|
|
|
S2[i + 3] = R0;
|
|
|
|
|
SET_KEY32 (S2, i + 2, L0);
|
|
|
|
|
SET_KEY32 (S2, i + 3, R0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (u32 i = 0; i < 256; i += 4)
|
|
|
|
|
{
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S3[i + 0] = L0;
|
|
|
|
|
S3[i + 1] = R0;
|
|
|
|
|
SET_KEY32 (S3, i + 0, L0);
|
|
|
|
|
SET_KEY32 (S3, i + 1, R0);
|
|
|
|
|
|
|
|
|
|
BF_ENCRYPT (L0, R0);
|
|
|
|
|
|
|
|
|
|
S3[i + 2] = L0;
|
|
|
|
|
S3[i + 3] = R0;
|
|
|
|
|
SET_KEY32 (S3, i + 2, L0);
|
|
|
|
|
SET_KEY32 (S3, i + 3, R0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GLOBAL_AS const odf11_t *es = &esalt_bufs[DIGESTS_OFFSET];
|
|
|
|
|