diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index f7d0f40fa..2d086c4b6 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -310,7 +310,11 @@ DECLSPEC u32x hc_rotl32 (const u32x a, const int n) #elif defined IS_CUDA return rotl32 (a, n); #else + #ifdef USE_ROTATE return rotate (a, make_u32x (n)); + #else + return ((a << n) | (a >> (32 - n))); + #endif #endif } @@ -321,7 +325,11 @@ DECLSPEC u32x hc_rotr32 (const u32x a, const int n) #elif defined IS_CUDA return rotr32 (a, n); #else + #ifdef USE_ROTATE return rotate (a, make_u32x (32 - n)); + #else + return ((a >> n) | (a << (32 - n))); + #endif #endif } @@ -332,7 +340,11 @@ DECLSPEC u32 hc_rotl32_S (const u32 a, const int n) #elif defined IS_CUDA return rotl32_S (a, n); #else + #ifdef USE_ROTATE return rotate (a, (u32) (n)); + #else + return ((a << n) | (a >> (32 - n))); + #endif #endif } @@ -343,7 +355,11 @@ DECLSPEC u32 hc_rotr32_S (const u32 a, const int n) #elif defined IS_CUDA return rotr32_S (a, n); #else + #ifdef USE_ROTATE return rotate (a, (u32) (32 - n)); + #else + return ((a >> n) | (a << (32 - n))); + #endif #endif } @@ -356,7 +372,11 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n) #elif defined IS_AMD return rotl64 (a, n); #else + #ifdef USE_ROTATE return rotate (a, make_u64x (n)); + #else + return ((a << n) | (a >> (64 - n))); + #endif #endif } @@ -369,7 +389,11 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n) #elif defined IS_AMD return rotr64 (a, n); #else + #ifdef USE_ROTATE return rotate (a, make_u64x (64 - n)); + #else + return ((a >> n) | (a << (64 - n))); + #endif #endif } @@ -382,7 +406,11 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n) #elif defined IS_AMD return rotl64_S (a, n); #else + #ifdef USE_ROTATE return rotate (a, (u64) (n)); + #else + return ((a << n) | (a >> (64 - n))); + #endif #endif } @@ -395,7 +423,11 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n) #elif defined IS_AMD return rotr64_S (a, n); #else + #ifdef USE_ROTATE return rotate (a, (u64) (64 - n)); + #else + return ((a >> n) | (a << (64 - n))); + #endif #endif } @@ -479,10 +511,20 @@ DECLSPEC u32x hc_swap32 (const u32x v) #endif #else + + #if defined USE_BITSELECT && defined USE_ROTATE r = bitselect (rotate (v, make_u32x (24)), rotate (v, make_u32x ( 8)), make_u32x (0x00ff00ff)); + #else + r = ((v & make_u32x (0xff000000)) >> 24) + | ((v & make_u32x (0x00ff0000)) >> 8) + | ((v & make_u32x (0x0000ff00)) << 8) + | ((v & make_u32x (0x000000ff)) << 24); #endif + + #endif + #endif return r; @@ -500,7 +542,14 @@ DECLSPEC u32 hc_swap32_S (const u32 v) #elif defined IS_NV && HAS_PRMT == 1 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v)); #else + #ifdef USE_SWIZZLE r = as_uint (as_uchar4 (v).s3210); + #else + r = ((v & 0xff000000) >> 24) + | ((v & 0x00ff0000) >> 8) + | ((v & 0x0000ff00) << 8) + | ((v & 0x000000ff) << 24); + #endif #endif #endif @@ -697,6 +746,9 @@ DECLSPEC u64x hc_swap64 (const u64x v) #endif #else + + #if defined USE_BITSELECT && defined USE_ROTATE + r = bitselect (bitselect (rotate (v, make_u64x (24)), rotate (v, make_u64x ( 8)), make_u64x (0x000000ff000000ff)), @@ -704,6 +756,19 @@ DECLSPEC u64x hc_swap64 (const u64x v) rotate (v, make_u64x (40)), make_u64x (0x00ff000000ff0000)), make_u64x (0xffff0000ffff0000)); + #else + + r = ((v & make_u64x (0xff00000000000000ULL)) >> 56) + | ((v & make_u64x (0x00ff000000000000ULL)) >> 40) + | ((v & make_u64x (0x0000ff0000000000ULL)) >> 24) + | ((v & make_u64x (0x000000ff00000000ULL)) >> 8) + | ((v & make_u64x (0x00000000ff000000ULL)) << 8) + | ((v & make_u64x (0x0000000000ff0000ULL)) << 24) + | ((v & make_u64x (0x000000000000ff00ULL)) << 40) + | ((v & make_u64x (0x00000000000000ffULL)) << 56); + + #endif + #endif #endif @@ -744,7 +809,18 @@ DECLSPEC u64 hc_swap64_S (const u64 v) asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl)); #else + #ifdef USE_SWIZZLE r = as_ulong (as_uchar8 (v).s76543210); + #else + r = ((v & 0xff00000000000000ULL) >> 56) + | ((v & 0x00ff000000000000ULL) >> 40) + | ((v & 0x0000ff0000000000ULL) >> 24) + | ((v & 0x000000ff00000000ULL) >> 8) + | ((v & 0x00000000ff000000ULL) << 8) + | ((v & 0x0000000000ff0000ULL) << 24) + | ((v & 0x000000000000ff00ULL) << 40) + | ((v & 0x00000000000000ffULL) << 56); + #endif #endif #endif diff --git a/OpenCL/inc_hash_md4.h b/OpenCL/inc_hash_md4.h index 8dcebba55..7c3b31894 100644 --- a/OpenCL/inc_hash_md4.h +++ b/OpenCL/inc_hash_md4.h @@ -10,26 +10,14 @@ #define MD4_G_S(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) #define MD4_H_S(x,y,z) ((x) ^ (y) ^ (z)) -#ifdef IS_NV #define MD4_F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) #define MD4_G(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) #define MD4_H(x,y,z) ((x) ^ (y) ^ (z)) + +#ifdef USE_BITSELECT #define MD4_Fo(x,y,z) (bitselect ((z), (y), (x))) #define MD4_Go(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_AMD -#define MD4_F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) -#define MD4_G(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define MD4_H(x,y,z) ((x) ^ (y) ^ (z)) -#define MD4_Fo(x,y,z) (bitselect ((z), (y), (x))) -#define MD4_Go(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_GENERIC -#define MD4_F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) -#define MD4_G(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define MD4_H(x,y,z) ((x) ^ (y) ^ (z)) +#else #define MD4_Fo(x,y,z) (MD4_F((x), (y), (z))) #define MD4_Go(x,y,z) (MD4_G((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_md5.h b/OpenCL/inc_hash_md5.h index f6b4a00b3..1e6eaaf93 100644 --- a/OpenCL/inc_hash_md5.h +++ b/OpenCL/inc_hash_md5.h @@ -11,35 +11,17 @@ #define MD5_H_S(x,y,z) ((x) ^ (y) ^ (z)) #define MD5_I_S(x,y,z) ((y) ^ ((x) | ~(z))) -#ifdef IS_NV #define MD5_F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define MD5_G(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) #define MD5_H(x,y,z) ((x) ^ (y) ^ (z)) #define MD5_H1(x,y,z) ((t = (x) ^ (y)) ^ (z)) #define MD5_H2(x,y,z) ((x) ^ t) #define MD5_I(x,y,z) ((y) ^ ((x) | ~(z))) -#define MD5_Fo(x,y,z) (MD5_F((x), (y), (z))) -#define MD5_Go(x,y,z) (MD5_G((x), (y), (z))) -#endif -#ifdef IS_AMD -#define MD5_F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define MD5_G(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) -#define MD5_H(x,y,z) ((x) ^ (y) ^ (z)) -#define MD5_H1(x,y,z) ((t = (x) ^ (y)) ^ (z)) -#define MD5_H2(x,y,z) ((x) ^ t) -#define MD5_I(x,y,z) ((y) ^ ((x) | ~(z))) +#ifdef USE_BITSELECT #define MD5_Fo(x,y,z) (bitselect ((z), (y), (x))) #define MD5_Go(x,y,z) (bitselect ((y), (x), (z))) -#endif - -#ifdef IS_GENERIC -#define MD5_F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define MD5_G(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) -#define MD5_H(x,y,z) ((x) ^ (y) ^ (z)) -#define MD5_H1(x,y,z) ((t = (x) ^ (y)) ^ (z)) -#define MD5_H2(x,y,z) ((x) ^ t) -#define MD5_I(x,y,z) ((y) ^ ((x) | ~(z))) +#else #define MD5_Fo(x,y,z) (MD5_F((x), (y), (z))) #define MD5_Go(x,y,z) (MD5_G((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_ripemd160.h b/OpenCL/inc_hash_ripemd160.h index 982f4fdb2..25a69ed56 100644 --- a/OpenCL/inc_hash_ripemd160.h +++ b/OpenCL/inc_hash_ripemd160.h @@ -6,32 +6,16 @@ #ifndef _INC_HASH_RIPEMD160_H #define _INC_HASH_RIPEMD160_H -#ifdef IS_NV #define RIPEMD160_F(x,y,z) ((x) ^ (y) ^ (z)) #define RIPEMD160_G(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */ #define RIPEMD160_H(x,y,z) (((x) | ~(y)) ^ (z)) #define RIPEMD160_I(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */ #define RIPEMD160_J(x,y,z) ((x) ^ ((y) | ~(z))) + +#ifdef USE_BITSELECT #define RIPEMD160_Go(x,y,z) (bitselect ((z), (y), (x))) #define RIPEMD160_Io(x,y,z) (bitselect ((y), (x), (z))) -#endif - -#ifdef IS_AMD -#define RIPEMD160_F(x,y,z) ((x) ^ (y) ^ (z)) -#define RIPEMD160_G(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */ -#define RIPEMD160_H(x,y,z) (((x) | ~(y)) ^ (z)) -#define RIPEMD160_I(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */ -#define RIPEMD160_J(x,y,z) ((x) ^ ((y) | ~(z))) -#define RIPEMD160_Go(x,y,z) (bitselect ((z), (y), (x))) -#define RIPEMD160_Io(x,y,z) (bitselect ((y), (x), (z))) -#endif - -#ifdef IS_GENERIC -#define RIPEMD160_F(x,y,z) ((x) ^ (y) ^ (z)) -#define RIPEMD160_G(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */ -#define RIPEMD160_H(x,y,z) (((x) | ~(y)) ^ (z)) -#define RIPEMD160_I(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */ -#define RIPEMD160_J(x,y,z) ((x) ^ ((y) | ~(z))) +#else #define RIPEMD160_Go(x,y,z) (RIPEMD160_G ((x), (y), (z))) #define RIPEMD160_Io(x,y,z) (RIPEMD160_I ((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_sha1.h b/OpenCL/inc_hash_sha1.h index 055516cb8..2ff36fdad 100644 --- a/OpenCL/inc_hash_sha1.h +++ b/OpenCL/inc_hash_sha1.h @@ -6,26 +6,14 @@ #ifndef _INC_HASH_SHA1_H #define _INC_HASH_SHA1_H -#ifdef IS_NV #define SHA1_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define SHA1_F1(x,y,z) ((x) ^ (y) ^ (z)) #define SHA1_F2(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) + +#ifdef USE_BITSELECT #define SHA1_F0o(x,y,z) (bitselect ((z), (y), (x))) #define SHA1_F2o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_AMD -#define SHA1_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define SHA1_F1(x,y,z) ((x) ^ (y) ^ (z)) -#define SHA1_F2(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#define SHA1_F0o(x,y,z) (bitselect ((z), (y), (x))) -#define SHA1_F2o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_GENERIC -#define SHA1_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define SHA1_F1(x,y,z) ((x) ^ (y) ^ (z)) -#define SHA1_F2(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) +#else #define SHA1_F0o(x,y,z) (SHA1_F0 ((x), (y), (z))) #define SHA1_F2o(x,y,z) (SHA1_F2 ((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_sha224.h b/OpenCL/inc_hash_sha224.h index 6f243f7b4..d68c79d65 100644 --- a/OpenCL/inc_hash_sha224.h +++ b/OpenCL/inc_hash_sha224.h @@ -18,23 +18,13 @@ #define SHA224_S2(x) (hc_rotl32 ((x), 30u) ^ hc_rotl32 ((x), 19u) ^ hc_rotl32 ((x), 10u)) #define SHA224_S3(x) (hc_rotl32 ((x), 26u) ^ hc_rotl32 ((x), 21u) ^ hc_rotl32 ((x), 7u)) -#ifdef IS_NV #define SHA224_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) #define SHA224_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) + +#ifdef USE_BITSELECT #define SHA224_F0o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) #define SHA224_F1o(x,y,z) (bitselect ((z), (y), (x))) -#endif - -#ifdef IS_AMD -#define SHA224_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#define SHA224_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define SHA224_F0o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#define SHA224_F1o(x,y,z) (bitselect ((z), (y), (x))) -#endif - -#ifdef IS_GENERIC -#define SHA224_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#define SHA224_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#else #define SHA224_F0o(x,y,z) (SHA224_F0 ((x), (y), (z))) #define SHA224_F1o(x,y,z) (SHA224_F1 ((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_sha256.h b/OpenCL/inc_hash_sha256.h index 89421be23..ccf5a79f8 100644 --- a/OpenCL/inc_hash_sha256.h +++ b/OpenCL/inc_hash_sha256.h @@ -18,23 +18,13 @@ #define SHA256_S2(x) (hc_rotl32 ((x), 30u) ^ hc_rotl32 ((x), 19u) ^ hc_rotl32 ((x), 10u)) #define SHA256_S3(x) (hc_rotl32 ((x), 26u) ^ hc_rotl32 ((x), 21u) ^ hc_rotl32 ((x), 7u)) -#ifdef IS_NV #define SHA256_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) #define SHA256_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) + +#ifdef USE_BITSELECT #define SHA256_F0o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) #define SHA256_F1o(x,y,z) (bitselect ((z), (y), (x))) -#endif - -#ifdef IS_AMD -#define SHA256_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#define SHA256_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define SHA256_F0o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#define SHA256_F1o(x,y,z) (bitselect ((z), (y), (x))) -#endif - -#ifdef IS_GENERIC -#define SHA256_F0(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#define SHA256_F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) +#else #define SHA256_F0o(x,y,z) (SHA256_F0 ((x), (y), (z))) #define SHA256_F1o(x,y,z) (SHA256_F1 ((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_sha384.h b/OpenCL/inc_hash_sha384.h index e19c9ec9a..92266b24a 100644 --- a/OpenCL/inc_hash_sha384.h +++ b/OpenCL/inc_hash_sha384.h @@ -21,17 +21,10 @@ #define SHA384_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define SHA384_F1(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#ifdef IS_NV +#ifdef USE_BITSELECT #define SHA384_F0o(x,y,z) (bitselect ((z), (y), (x))) #define SHA384_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_AMD -#define SHA384_F0o(x,y,z) (bitselect ((z), (y), (x))) -#define SHA384_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_GENERIC +#else #define SHA384_F0o(x,y,z) (SHA384_F0 ((x), (y), (z))) #define SHA384_F1o(x,y,z) (SHA384_F1 ((x), (y), (z))) #endif diff --git a/OpenCL/inc_hash_sha512.h b/OpenCL/inc_hash_sha512.h index f30dc2b86..c66aa1fb9 100644 --- a/OpenCL/inc_hash_sha512.h +++ b/OpenCL/inc_hash_sha512.h @@ -21,17 +21,10 @@ #define SHA512_F0(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define SHA512_F1(x,y,z) (((x) & (y)) | ((z) & ((x) ^ (y)))) -#ifdef IS_NV +#ifdef USE_BITSELECT #define SHA512_F0o(x,y,z) (bitselect ((z), (y), (x))) #define SHA512_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_AMD -#define SHA512_F0o(x,y,z) (bitselect ((z), (y), (x))) -#define SHA512_F1o(x,y,z) (bitselect ((x), (y), ((x) ^ (z)))) -#endif - -#ifdef IS_GENERIC +#else #define SHA512_F0o(x,y,z) (SHA512_F0 ((x), (y), (z))) #define SHA512_F1o(x,y,z) (SHA512_F1 ((x), (y), (z))) #endif diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index f2f201e19..b1a656ce8 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -144,4 +144,13 @@ #undef _unroll #endif +// Whitelist some OpenCL specific functions +// This could create more stable kernels on systems with bad OpenCL drivers + +#ifdef IS_NV +#define USE_BITSELECT +#define USE_ROTATE +#define USE_SWIZZLE +#endif + #endif