Small performance boost for bcrypt on CPU

pull/1646/head
Jens Steube 6 years ago
parent a03b5dae57
commit 1c280e4a6e

@ -286,59 +286,17 @@ __constant u32a c_sbox3[256] =
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
};
#ifdef IS_AMD
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
tmp ^= S2[c.s1]; \
tmp += S3[c.s0]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#ifdef IS_NV
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
tmp = S0[hc_bfe ((L), 24, 8)]; \
tmp += S1[hc_bfe ((L), 16, 8)]; \
tmp ^= S2[hc_bfe ((L), 8, 8)]; \
tmp += S3[hc_bfe ((L), 0, 8)]; \
tmp = S0[hc_bfe_S ((L), 24, 8)]; \
tmp += S1[hc_bfe_S ((L), 16, 8)]; \
tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \
tmp += S3[hc_bfe_S ((L), 0, 8)]; \
\
(R) ^= tmp ^ P[(N)]; \
(R) ^= tmp ^ P[(N)]; \
}
#endif
#ifdef IS_GENERIC
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
tmp ^= S2[c.s1]; \
tmp += S3[c.s0]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
// temporary hack for Apple Iris GPUs (with as little performance drop as possible)
#if defined (IS_APPLE) && defined (IS_GPU)
#define TMP_TYPE u32
#else
#define TMP_TYPE u32
#endif
#define BF_ENCRYPT(L,R) \
{ \
@ -361,7 +319,7 @@ __constant u32a c_sbox3[256] =
BF_ROUND (L, R, 15); \
BF_ROUND (R, L, 16); \
\
TMP_TYPE tmp; \
u32 tmp; \
\
tmp = R; \
R = L; \
@ -370,24 +328,23 @@ __constant u32a c_sbox3[256] =
L ^= P[17]; \
}
DECLSPEC void expand_key (u32 *E, const u32 *W, const u32 len)
DECLSPEC void expand_key (u32 *E, u32 *W, const int len)
{
u8 *E_cur = (u8 *) E;
u8 *E_stop = E_cur + 72;
u8 *E_ptr = (u8 *) E;
u8 *W_ptr = (u8 *) W;
while (E_cur < E_stop)
for (int pos = 0; pos < 72; pos++) // pos++ is not a bug, we actually want that zero byte here
{
u8 *W_cur = (u8 *) W;
u8 *W_stop = W_cur + len;
const int left = 72 - pos;
while (W_cur < W_stop)
{
*E_cur++ = *W_cur++;
const int sz = (len < left) ? len : left; // should be MIN()
if (E_cur == E_stop) return;
for (int i = 0; i < sz; i++)
{
E_ptr[pos + i] = W_ptr[i];
}
*E_cur++ = 0;
pos += sz;
}
}
@ -425,7 +382,7 @@ __kernel void __attribute__((reqd_work_group_size(8, 1, 1))) m03200_init (__glob
w[16] = pws[gid].i[16];
w[17] = pws[gid].i[17];
u32 E[18];
u32 E[18] = { 0 };
expand_key (E, w, pw_len);

Loading…
Cancel
Save