|
|
|
@ -308,51 +308,22 @@ __constant u32a c_pbox[18] =
|
|
|
|
|
0x9216d5d9, 0x8979fb1b
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
#define BF_ROUND(L,R,N) \
|
|
|
|
|
{ \
|
|
|
|
|
uchar4 c = as_uchar4 ((L)); \
|
|
|
|
|
\
|
|
|
|
|
u32 tmp; \
|
|
|
|
|
\
|
|
|
|
|
tmp = S0[c.s3]; \
|
|
|
|
|
tmp += S1[c.s2]; \
|
|
|
|
|
tmp ^= S2[c.s1]; \
|
|
|
|
|
tmp += S3[c.s0]; \
|
|
|
|
|
\
|
|
|
|
|
(R) ^= tmp ^ P[(N)]; \
|
|
|
|
|
#define BF_ROUND(L,R,N) \
|
|
|
|
|
{ \
|
|
|
|
|
u32 tmp; \
|
|
|
|
|
\
|
|
|
|
|
const u32 r0 = unpack_v8d_from_v32_S ((L)); \
|
|
|
|
|
const u32 r1 = unpack_v8c_from_v32_S ((L)); \
|
|
|
|
|
const u32 r2 = unpack_v8b_from_v32_S ((L)); \
|
|
|
|
|
const u32 r3 = unpack_v8a_from_v32_S ((L)); \
|
|
|
|
|
\
|
|
|
|
|
tmp = S0[r0]; \
|
|
|
|
|
tmp += S1[r1]; \
|
|
|
|
|
tmp ^= S2[r2]; \
|
|
|
|
|
tmp += S3[r3]; \
|
|
|
|
|
\
|
|
|
|
|
(R) ^= tmp ^ P[(N)]; \
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef IS_NV
|
|
|
|
|
#define BF_ROUND(L,R,N) \
|
|
|
|
|
{ \
|
|
|
|
|
u32 tmp; \
|
|
|
|
|
\
|
|
|
|
|
tmp = S0[hc_bfe_S ((L), 24, 8)]; \
|
|
|
|
|
tmp += S1[hc_bfe_S ((L), 16, 8)]; \
|
|
|
|
|
tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \
|
|
|
|
|
tmp += S3[hc_bfe_S ((L), 0, 8)]; \
|
|
|
|
|
\
|
|
|
|
|
(R) ^= tmp ^ P[(N)]; \
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef IS_GENERIC
|
|
|
|
|
#define BF_ROUND(L,R,N) \
|
|
|
|
|
{ \
|
|
|
|
|
uchar4 c = as_uchar4 ((L)); \
|
|
|
|
|
\
|
|
|
|
|
u32 tmp; \
|
|
|
|
|
\
|
|
|
|
|
tmp = S0[c.s3]; \
|
|
|
|
|
tmp += S1[c.s2]; \
|
|
|
|
|
tmp ^= S2[c.s1]; \
|
|
|
|
|
tmp += S3[c.s0]; \
|
|
|
|
|
\
|
|
|
|
|
(R) ^= tmp ^ P[(N)]; \
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define BF_ENCRYPT(L,R) \
|
|
|
|
|
{ \
|
|
|
|
@ -512,7 +483,7 @@ DECLSPEC void sha1_transform (const u32 *w0, const u32 *w1, const u32 *w2, const
|
|
|
|
|
digest[4] += E;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
{
|
|
|
|
|
/**
|
|
|
|
|
* base
|
|
|
|
@ -626,10 +597,10 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
P[i] = c_pbox[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__local u32 S0_all[8][256];
|
|
|
|
|
__local u32 S1_all[8][256];
|
|
|
|
|
__local u32 S2_all[8][256];
|
|
|
|
|
__local u32 S3_all[8][256];
|
|
|
|
|
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
|
|
|
|
|
__local u32 *S0 = S0_all[lid];
|
|
|
|
|
__local u32 *S1 = S1_all[lid];
|
|
|
|
@ -731,7 +702,7 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
{
|
|
|
|
|
/**
|
|
|
|
|
* base
|
|
|
|
@ -752,23 +723,21 @@ __kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t))
|
|
|
|
|
|
|
|
|
|
u32 P[18];
|
|
|
|
|
|
|
|
|
|
#pragma unroll
|
|
|
|
|
for (u32 i = 0; i < 18; i++)
|
|
|
|
|
{
|
|
|
|
|
P[i] = tmps[gid].P[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__local u32 S0_all[8][256];
|
|
|
|
|
__local u32 S1_all[8][256];
|
|
|
|
|
__local u32 S2_all[8][256];
|
|
|
|
|
__local u32 S3_all[8][256];
|
|
|
|
|
__local u32 S0_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
__local u32 S1_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
__local u32 S2_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
__local u32 S3_all[FIXED_LOCAL_SIZE][256];
|
|
|
|
|
|
|
|
|
|
__local u32 *S0 = S0_all[lid];
|
|
|
|
|
__local u32 *S1 = S1_all[lid];
|
|
|
|
|
__local u32 *S2 = S2_all[lid];
|
|
|
|
|
__local u32 *S3 = S3_all[lid];
|
|
|
|
|
|
|
|
|
|
#pragma unroll
|
|
|
|
|
for (u32 i = 0; i < 256; i++)
|
|
|
|
|
{
|
|
|
|
|
S0[i] = tmps[gid].S0[i];
|
|
|
|
|