Slightly increase blowfish based algorithms for NV

pull/30/head
jsteube 9 years ago
parent 747c4a8d6a
commit 6c22de104e

@ -294,11 +294,12 @@ __constant u32 c_sbox3[256] =
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
};
#ifdef IS_AMD
#define BF_ROUND(L,R,N) \
{ \
uchar4 c = as_uchar4 ((L)); \
\
u32 tmp; \
u32 tmp; \
\
tmp = S0[c.s3]; \
tmp += S1[c.s2]; \
@ -307,6 +308,21 @@ __constant u32 c_sbox3[256] =
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#ifdef IS_NV
#define BF_ROUND(L,R,N) \
{ \
u32 tmp; \
\
tmp = S0[__bfe ((L), 24, 8)]; \
tmp += S1[__bfe ((L), 16, 8)]; \
tmp ^= S2[__bfe ((L), 8, 8)]; \
tmp += S3[__bfe ((L), 0, 8)]; \
\
(R) ^= tmp ^ P[(N)]; \
}
#endif
#define BF_ENCRYPT(L,R) \
{ \

@ -294,7 +294,6 @@ __constant u32 c_sbox3[256] =
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6
};
#ifdef IS_AMD
#define BF_ROUND(L,R,N) \
{ \
@ -316,10 +315,10 @@ __constant u32 c_sbox3[256] =
{ \
u32 tmp; \
\
tmp = S0[((L) >> 24) & 0xff]; \
tmp += S1[((L) >> 16) & 0xff]; \
tmp ^= S2[((L) >> 8) & 0xff]; \
tmp += S3[((L) >> 0) & 0xff]; \
tmp = S0[__bfe ((L), 24, 8)]; \
tmp += S1[__bfe ((L), 16, 8)]; \
tmp ^= S2[__bfe ((L), 8, 8)]; \
tmp += S3[__bfe ((L), 0, 8)]; \
\
(R) ^= tmp ^ P[(N)]; \
}

@ -22,15 +22,25 @@ static inline u64 swap64 (const u64 v)
#endif
#ifdef IS_NV
static inline u32 __byte_perm (const u32 a, const u32 b, const u32 s)
static inline u32 __byte_perm (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(s));
asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
return r;
}
static inline u32 __bfe (const u32 a, const u32 b, const u32 c)
{
u32 r;
asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c));
return r;
}
#if CUDA_ARCH >= 350
static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)

Loading…
Cancel
Save