From c29c7c093fbedca5c7e6eb931a614cca923644b7 Mon Sep 17 00:00:00 2001 From: jsteube Date: Wed, 16 Dec 2015 11:11:31 +0100 Subject: [PATCH] Fix use of LOP3 --- OpenCL/types_ocl.c | 4 +- include/kernel_functions.c | 132 +++++++++++++++++++------------------ 2 files changed, 71 insertions(+), 65 deletions(-) diff --git a/OpenCL/types_ocl.c b/OpenCL/types_ocl.c index 7347ea169..b4a6978ef 100644 --- a/OpenCL/types_ocl.c +++ b/OpenCL/types_ocl.c @@ -22,11 +22,11 @@ static u64 swap64 (const u64 v) #endif #ifdef IS_NV -static u32 __byte_perm (const u32 a, const u32 b, const u32 c) +static u32 __byte_perm (const u32 a, const u32 b, const u32 s) { u32 r; - asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); + asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(s)); return r; } diff --git a/include/kernel_functions.c b/include/kernel_functions.c index 965440936..05352e099 100644 --- a/include/kernel_functions.c +++ b/include/kernel_functions.c @@ -4,11 +4,9 @@ */ #if defined _MD4_ || defined _DCC2_ || defined _NETNTLMV2_ || defined _KRB5PA_ || defined _MS_DRSR_ -/** - * MD4 Functions - */ -#if __CUDA_ARCH__ >= 500 +#ifdef IS_NV +#if CUDA_ARCH >= 500 #define MD4_F(x,y,z) lut3_ca ((x), (y), (z)) #define MD4_G(x,y,z) lut3_e8 ((x), (y), (z)) #define MD4_H(x,y,z) lut3_96 ((x), (y), (z)) @@ -21,15 +19,20 @@ #define MD4_H1(x,y,z) ((tmp2 = (x) ^ (y)) ^ (z)) #define MD4_H2(x,y,z) ((x) ^ tmp2) #endif - -#ifdef IS_AMD -#define MD4_Fo(x,y,z) (bitselect ((z), (y), (x))) -#define MD4_Go(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#else #define MD4_Fo(x,y,z) (MD4_F((x), (y), (z))) #define MD4_Go(x,y,z) (MD4_G((x), (y), (z))) #endif +#ifdef IS_AMD +#define MD4_F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) +#define MD4_G(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) +#define MD4_H(x,y,z) ((x) ^ (y) ^ (z)) +#define MD4_H1(x,y,z) ((tmp2 = (x) ^ (y)) ^ (z)) +#define MD4_H2(x,y,z) ((x) ^ tmp2) +#define MD4_Fo(x,y,z) (bitselect ((z), (y), (x))) +#define MD4_Go(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) +#endif + #define MD4_STEP(f,a,b,c,d,x,K,s) \ { \ a += K; \ @@ -48,40 +51,36 @@ #endif #if defined _MD5_ || defined _MD5H_ || defined _SAPB_ || defined _OLDOFFICE01_ || defined _WPA_ || defined _MD5_SHA1_ || defined _SHA1_MD5_ || defined _NETNTLMV2_ || defined _KRB5PA_ || defined _PBKDF2_MD5_ -/** - * MD5 Functions - */ -#if __CUDA_ARCH__ >= 500 +#ifdef IS_NV +#if CUDA_ARCH >= 500 #define MD5_F(x,y,z) lut3_ca ((x), (y), (z)) #define MD5_G(x,y,z) lut3_e4 ((x), (y), (z)) #define MD5_H(x,y,z) lut3_96 ((x), (y), (z)) #define MD5_H1(x,y,z) lut3_96 ((x), (y), (z)) #define MD5_H2(x,y,z) lut3_96 ((x), (y), (z)) +#define MD5_I(x,y,z) lut3_39 ((x), (y), (z)) #else #define MD5_F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define MD5_G(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) #define MD5_H(x,y,z) ((x) ^ (y) ^ (z)) #define MD5_H1(x,y,z) ((tmp2 = (x) ^ (y)) ^ (z)) #define MD5_H2(x,y,z) ((x) ^ tmp2) -#endif - -#ifdef IS_AMD -#define MD5_I(x,y,z) (bitselect (0xffffffffU, (x), (z)) ^ (y)) -#else -#if __CUDA_ARCH__ >= 500 -#define MD5_I(x,y,z) lut3_39 ((x), (y), (z)) -#else #define MD5_I(x,y,z) ((y) ^ ((x) | ~(z))) #endif +#define MD5_Fo(x,y,z) (MD5_F((x), (y), (z))) +#define MD5_Go(x,y,z) (MD5_G((x), (y), (z))) #endif #ifdef IS_AMD +#define MD5_F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define MD5_G(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) +#define MD5_H(x,y,z) ((x) ^ (y) ^ (z)) +#define MD5_H1(x,y,z) ((tmp2 = (x) ^ (y)) ^ (z)) +#define MD5_H2(x,y,z) ((x) ^ tmp2) +#define MD5_I(x,y,z) (bitselect (0xffffffffU, (x), (z)) ^ (y)) #define MD5_Fo(x,y,z) (bitselect ((z), (y), (x))) #define MD5_Go(x,y,z) (bitselect ((y), (x), (z))) -#else -#define MD5_Fo(x,y,z) (MD5_F((x), (y), (z))) -#define MD5_Go(x,y,z) (MD5_G((x), (y), (z))) #endif #define MD5_STEP(f,a,b,c,d,x,K,s) \ @@ -108,7 +107,8 @@ * SHA1 Functions */ -#if __CUDA_ARCH__ >= 500 +#ifdef IS_NV +#if CUDA_ARCH >= 500 #define SHA1_F0(x,y,z) lut3_ca ((x), (y), (z)) #define SHA1_F1(x,y,z) lut3_96 ((x), (y), (z)) #define SHA1_F2(x,y,z) lut3_e8 ((x), (y), (z)) @@ -117,15 +117,18 @@ #define SHA1_F1(x,y,z) ((x) ^ (y) ^ (z)) #define SHA1_F2(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) #endif - -#ifdef IS_AMD -#define SHA1_F0o(x,y,z) (bitselect ((z), (y), (x))) -#define SHA1_F2o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#else #define SHA1_F0o(x,y,z) (SHA1_F0 ((x), (y), (z))) #define SHA1_F2o(x,y,z) (SHA1_F2 ((x), (y), (z))) #endif +#ifdef IS_AMD +#define SHA1_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define SHA1_F1(x,y,z) ((x) ^ (y) ^ (z)) +#define SHA1_F2(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) +#define SHA1_F0o(x,y,z) (bitselect ((z), (y), (x))) +#define SHA1_F2o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) +#endif + #define SHA1_STEP(f,a,b,c,d,e,x) \ { \ e += K; \ @@ -166,9 +169,6 @@ #endif #if defined _SHA256_ || defined _PDF17L8_ || defined _SEVEN_ZIP_ || defined _ANDROIDFDE_ || defined _CLOUDKEY_ || defined _SCRYPT_ || defined _PBKDF2_SHA256_ || defined _SHA256_SHA1_ || defined _MS_DRSR_ -/** - * SHA256 Functions - */ #define SHIFT_RIGHT_32(x,n) ((x) >> (n)) @@ -177,22 +177,25 @@ #define SHA256_S2(x) (rotl32 ((x), 30u) ^ rotl32 ((x), 19u) ^ rotl32 ((x), 10u)) #define SHA256_S3(x) (rotl32 ((x), 26u) ^ rotl32 ((x), 21u) ^ rotl32 ((x), 7u)) -#if __CUDA_ARCH__ >= 500 +#ifdef IS_NV +#if CUDA_ARCH >= 500 #define SHA256_F0(x,y,z) lut3_e8 ((x), (y), (z)) #define SHA256_F1(x,y,z) lut3_ca ((x), (y), (z)) #else #define SHA256_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) #define SHA256_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #endif - -#ifdef IS_AMD -#define SHA256_F0o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#define SHA256_F1o(x,y,z) (bitselect ((z), (y), (x))) -#else #define SHA256_F0o(x,y,z) (SHA256_F0 ((x), (y), (z))) #define SHA256_F1o(x,y,z) (SHA256_F1 ((x), (y), (z))) #endif +#ifdef IS_AMD +#define SHA256_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) +#define SHA256_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#define SHA256_F0o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) +#define SHA256_F1o(x,y,z) (bitselect ((z), (y), (x))) +#endif + #define SHA256_STEP(F0,F1,a,b,c,d,e,f,g,h,x,K) \ { \ h += K; \ @@ -205,13 +208,10 @@ } #define SHA256_EXPAND(x,y,z,w) (SHA256_S1 (x) + y + SHA256_S0 (z) + w) + #endif - #if defined _SHA384_ || defined _PDF17L8_ -/** - * SHA384 Functions - */ #define SHIFT_RIGHT_64(x,n) ((x) >> (n)) @@ -223,17 +223,19 @@ #define SHA384_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define SHA384_F1(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#ifdef IS_AMD -#define SHA384_F0o(x,y,z) (bitselect ((z), (y), (x))) -#define SHA384_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#else +#ifdef IS_NV #define SHA384_F0o(x,y,z) (SHA384_F0 ((x), (y), (z))) #define SHA384_F1o(x,y,z) (SHA384_F1 ((x), (y), (z))) #endif +#ifdef IS_AMD +#define SHA384_F0o(x,y,z) (bitselect ((z), (y), (x))) +#define SHA384_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) +#endif + #define SHA384_STEP(F0,F1,a,b,c,d,e,f,g,h,x,K) \ { \ - u64 temp0; \ + u64 temp0; \ temp0 = K; \ temp0 += x; \ temp0 += h; \ @@ -263,17 +265,19 @@ #define SHA512_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define SHA512_F1(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#ifdef IS_AMD -#define SHA512_F0o(x,y,z) (bitselect ((z), (y), (x))) -#define SHA512_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#else +#ifdef IS_NV #define SHA512_F0o(x,y,z) (SHA512_F0 ((x), (y), (z))) #define SHA512_F1o(x,y,z) (SHA512_F1 ((x), (y), (z))) #endif +#ifdef IS_AMD +#define SHA512_F0o(x,y,z) (bitselect ((z), (y), (x))) +#define SHA512_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) +#endif + #define SHA512_STEP(F0,F1,a,b,c,d,e,f,g,h,x,K) \ { \ - u64 temp0; \ + u64 temp0; \ temp0 = K; \ temp0 += x; \ temp0 += h; \ @@ -294,11 +298,9 @@ #endif #ifdef _RIPEMD160_ -/** - * RIPEMD160 Functions - */ -#if __CUDA_ARCH__ >= 500 +#ifdef IS_NV +#if CUDA_ARCH >= 500 #define RIPEMD160_F(x,y,z) lut3_96 ((x), (y), (z)) #define RIPEMD160_G(x,y,z) lut3_ca ((x), (y), (z)) #define RIPEMD160_H(x,y,z) lut3_59 ((x), (y), (z)) @@ -311,15 +313,20 @@ #define RIPEMD160_I(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */ #define RIPEMD160_J(x,y,z) ((x) ^ ((y) | ~(z))) #endif - -#ifdef IS_AMD -#define RIPEMD160_Go(x,y,z) (bitselect ((z), (y), (x))) -#define RIPEMD160_Io(x,y,z) (bitselect ((y), (x), (z))) -#else #define RIPEMD160_Go(x,y,z) (RIPEMD160_G ((x), (y), (z))) #define RIPEMD160_Io(x,y,z) (RIPEMD160_I ((x), (y), (z))) #endif +#ifdef IS_AMD +#define RIPEMD160_F(x,y,z) ((x) ^ (y) ^ (z)) +#define RIPEMD160_G(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */ +#define RIPEMD160_H(x,y,z) (((x) | ~(y)) ^ (z)) +#define RIPEMD160_I(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */ +#define RIPEMD160_J(x,y,z) ((x) ^ ((y) | ~(z))) +#define RIPEMD160_Go(x,y,z) (bitselect ((z), (y), (x))) +#define RIPEMD160_Io(x,y,z) (bitselect ((y), (x), (z))) +#endif + #define RIPEMD160_STEP(f,a,b,c,d,e,x,K,s) \ { \ a += K; \ @@ -343,4 +350,3 @@ } #endif -