diff --git a/OpenCL/m10100_a0-optimized.cl b/OpenCL/m10100_a0-optimized.cl index 42c6f939e..946c47f71 100644 --- a/OpenCL/m10100_a0-optimized.cl +++ b/OpenCL/m10100_a0-optimized.cl @@ -16,11 +16,64 @@ #include M2S(INCLUDE_PATH/inc_simd.cl) #endif +DECLSPEC u64 siphash_rot32_S (const u64 a) +{ + vconv64_t in; + + in.v64 = a; + + vconv64_t out; + + out.v32.a = in.v32.b; + out.v32.b = in.v32.a; + + return out.v64; +} + +DECLSPEC u64x siphash_rot32 (const u64x a) +{ + u64x r; + + #if VECT_SIZE == 1 + r = siphash_rot32_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = siphash_rot32_S (a.s0); + r.s1 = siphash_rot32_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = siphash_rot32_S (a.s2); + r.s3 = siphash_rot32_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = siphash_rot32_S (a.s4); + r.s5 = siphash_rot32_S (a.s5); + r.s6 = siphash_rot32_S (a.s6); + r.s7 = siphash_rot32_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = siphash_rot32_S (a.s8); + r.s9 = siphash_rot32_S (a.s9); + r.sa = siphash_rot32_S (a.sa); + r.sb = siphash_rot32_S (a.sb); + r.sc = siphash_rot32_S (a.sc); + r.sd = siphash_rot32_S (a.sd); + r.se = siphash_rot32_S (a.se); + r.sf = siphash_rot32_S (a.sf); + #endif + + return r; +} + #define SIPROUND(v0,v1,v2,v3) \ (v0) += (v1); \ (v1) = hc_rotl64 ((v1), 13); \ (v1) ^= (v0); \ - (v0) = hc_rotl64 ((v0), 32); \ + (v0) = siphash_rot32 ((v0)); \ (v2) += (v3); \ (v3) = hc_rotl64 ((v3), 16); \ (v3) ^= (v2); \ @@ -30,7 +83,7 @@ (v2) += (v1); \ (v1) = hc_rotl64 ((v1), 17); \ (v1) ^= (v2); \ - (v2) = hc_rotl64 ((v2), 32) + (v2) = siphash_rot32 ((v2)) KERNEL_FQ void m10100_m04 (KERN_ATTR_RULES ()) { diff --git a/OpenCL/m10100_a1-optimized.cl b/OpenCL/m10100_a1-optimized.cl index 00b469ee6..ca7e5b63d 100644 --- a/OpenCL/m10100_a1-optimized.cl +++ b/OpenCL/m10100_a1-optimized.cl @@ -14,11 +14,64 @@ #include M2S(INCLUDE_PATH/inc_simd.cl) #endif +DECLSPEC u64 siphash_rot32_S (const u64 a) +{ + vconv64_t in; + + in.v64 = a; + + vconv64_t out; + + out.v32.a = in.v32.b; + out.v32.b = in.v32.a; + + return out.v64; +} + +DECLSPEC u64x siphash_rot32 (const u64x a) +{ + u64x r; + + #if VECT_SIZE == 1 + r = siphash_rot32_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = siphash_rot32_S (a.s0); + r.s1 = siphash_rot32_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = siphash_rot32_S (a.s2); + r.s3 = siphash_rot32_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = siphash_rot32_S (a.s4); + r.s5 = siphash_rot32_S (a.s5); + r.s6 = siphash_rot32_S (a.s6); + r.s7 = siphash_rot32_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = siphash_rot32_S (a.s8); + r.s9 = siphash_rot32_S (a.s9); + r.sa = siphash_rot32_S (a.sa); + r.sb = siphash_rot32_S (a.sb); + r.sc = siphash_rot32_S (a.sc); + r.sd = siphash_rot32_S (a.sd); + r.se = siphash_rot32_S (a.se); + r.sf = siphash_rot32_S (a.sf); + #endif + + return r; +} + #define SIPROUND(v0,v1,v2,v3) \ (v0) += (v1); \ (v1) = hc_rotl64 ((v1), 13); \ (v1) ^= (v0); \ - (v0) = hc_rotl64 ((v0), 32); \ + (v0) = siphash_rot32 ((v0)); \ (v2) += (v3); \ (v3) = hc_rotl64 ((v3), 16); \ (v3) ^= (v2); \ @@ -28,7 +81,7 @@ (v2) += (v1); \ (v1) = hc_rotl64 ((v1), 17); \ (v1) ^= (v2); \ - (v2) = hc_rotl64 ((v2), 32) + (v2) = siphash_rot32 ((v2)) KERNEL_FQ void m10100_m04 (KERN_ATTR_BASIC ()) { diff --git a/OpenCL/m10100_a3-optimized.cl b/OpenCL/m10100_a3-optimized.cl index 219b0c444..26c1e036f 100644 --- a/OpenCL/m10100_a3-optimized.cl +++ b/OpenCL/m10100_a3-optimized.cl @@ -13,11 +13,64 @@ #include M2S(INCLUDE_PATH/inc_simd.cl) #endif +DECLSPEC u64 siphash_rot32_S (const u64 a) +{ + vconv64_t in; + + in.v64 = a; + + vconv64_t out; + + out.v32.a = in.v32.b; + out.v32.b = in.v32.a; + + return out.v64; +} + +DECLSPEC u64x siphash_rot32 (const u64x a) +{ + u64x r; + + #if VECT_SIZE == 1 + r = siphash_rot32_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = siphash_rot32_S (a.s0); + r.s1 = siphash_rot32_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = siphash_rot32_S (a.s2); + r.s3 = siphash_rot32_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = siphash_rot32_S (a.s4); + r.s5 = siphash_rot32_S (a.s5); + r.s6 = siphash_rot32_S (a.s6); + r.s7 = siphash_rot32_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = siphash_rot32_S (a.s8); + r.s9 = siphash_rot32_S (a.s9); + r.sa = siphash_rot32_S (a.sa); + r.sb = siphash_rot32_S (a.sb); + r.sc = siphash_rot32_S (a.sc); + r.sd = siphash_rot32_S (a.sd); + r.se = siphash_rot32_S (a.se); + r.sf = siphash_rot32_S (a.sf); + #endif + + return r; +} + #define SIPROUND(v0,v1,v2,v3) \ (v0) += (v1); \ (v1) = hc_rotl64 ((v1), 13); \ (v1) ^= (v0); \ - (v0) = hc_rotl64 ((v0), 32); \ + (v0) = siphash_rot32 ((v0)); \ (v2) += (v3); \ (v3) = hc_rotl64 ((v3), 16); \ (v3) ^= (v2); \ @@ -27,7 +80,7 @@ (v2) += (v1); \ (v1) = hc_rotl64 ((v1), 17); \ (v1) ^= (v2); \ - (v2) = hc_rotl64 ((v2), 32) + (v2) = siphash_rot32 ((v2)) DECLSPEC void m10100m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ()) {