From 6c22de104e04cdf24b193026f1fa5f4b27f3489c Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 21 Dec 2015 21:00:52 +0100 Subject: [PATCH] Slightly increase blowfish based algorithms for NV --- OpenCL/m03200.cl | 18 +++++++++++++++++- OpenCL/m09000.cl | 9 ++++----- OpenCL/types_ocl.c | 14 ++++++++++++-- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/OpenCL/m03200.cl b/OpenCL/m03200.cl index f62fd4f7d..dd5c522eb 100644 --- a/OpenCL/m03200.cl +++ b/OpenCL/m03200.cl @@ -294,11 +294,12 @@ __constant u32 c_sbox3[256] = 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 }; +#ifdef IS_AMD #define BF_ROUND(L,R,N) \ { \ uchar4 c = as_uchar4 ((L)); \ \ - u32 tmp; \ + u32 tmp; \ \ tmp = S0[c.s3]; \ tmp += S1[c.s2]; \ @@ -307,6 +308,21 @@ __constant u32 c_sbox3[256] = \ (R) ^= tmp ^ P[(N)]; \ } +#endif + +#ifdef IS_NV +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + tmp = S0[__bfe ((L), 24, 8)]; \ + tmp += S1[__bfe ((L), 16, 8)]; \ + tmp ^= S2[__bfe ((L), 8, 8)]; \ + tmp += S3[__bfe ((L), 0, 8)]; \ + \ + (R) ^= tmp ^ P[(N)]; \ +} +#endif #define BF_ENCRYPT(L,R) \ { \ diff --git a/OpenCL/m09000.cl b/OpenCL/m09000.cl index 8cbf21e88..1e4080b53 100644 --- a/OpenCL/m09000.cl +++ b/OpenCL/m09000.cl @@ -294,7 +294,6 @@ __constant u32 c_sbox3[256] = 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 }; - #ifdef IS_AMD #define BF_ROUND(L,R,N) \ { \ @@ -316,10 +315,10 @@ __constant u32 c_sbox3[256] = { \ u32 tmp; \ \ - tmp = S0[((L) >> 24) & 0xff]; \ - tmp += S1[((L) >> 16) & 0xff]; \ - tmp ^= S2[((L) >> 8) & 0xff]; \ - tmp += S3[((L) >> 0) & 0xff]; \ + tmp = S0[__bfe ((L), 24, 8)]; \ + tmp += S1[__bfe ((L), 16, 8)]; \ + tmp ^= S2[__bfe ((L), 8, 8)]; \ + tmp += S3[__bfe ((L), 0, 8)]; \ \ (R) ^= tmp ^ P[(N)]; \ } diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index a091e5d57..74d00b957 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -22,15 +22,25 @@ static inline u64 swap64 (const u64 v) #endif #ifdef IS_NV -static inline u32 __byte_perm (const u32 a, const u32 b, const u32 s) +static inline u32 __byte_perm (const u32 a, const u32 b, const u32 c) { u32 r; - asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(s)); + asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); return r; } +static inline u32 __bfe (const u32 a, const u32 b, const u32 c) +{ + u32 r; + + asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); + + return r; +} + + #if CUDA_ARCH >= 350 static inline u32 amd_bytealign (const u32 a, const u32 b, const u32 c)