From 525f8af200e4334dfa1147e0103a80fcdd8df95c Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Mon, 3 Feb 2020 15:51:08 +0100 Subject: [PATCH] Add v8x_from_v64_x to inc_common.cl --- OpenCL/inc_common.cl | 384 +++++++++++++++++++++++++++++++++++ OpenCL/inc_common.h | 18 ++ OpenCL/inc_hash_whirlpool.cl | 64 +++--- tools/benchmark_deep.pl | 2 +- 4 files changed, 435 insertions(+), 33 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index a6dfc71c4..e407e02c2 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -48,6 +48,390 @@ DECLSPEC u8 v8d_from_v32_S (const u32 v32) return v.v8.d; } +DECLSPEC u8 v8a_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.a; +} + +DECLSPEC u8 v8b_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.b; +} + +DECLSPEC u8 v8c_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.c; +} + +DECLSPEC u8 v8d_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.d; +} + +DECLSPEC u8 v8e_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.e; +} + +DECLSPEC u8 v8f_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.f; +} + +DECLSPEC u8 v8g_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.g; +} + +DECLSPEC u8 v8h_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v8.h; +} + +DECLSPEC u8x v8a_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8a_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8a_from_v64_S (a.s0); + r.s1 = v8a_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8a_from_v64_S (a.s2); + r.s3 = v8a_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8a_from_v64_S (a.s4); + r.s5 = v8a_from_v64_S (a.s5); + r.s6 = v8a_from_v64_S (a.s6); + r.s7 = v8a_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8a_from_v64_S (a.s8); + r.s9 = v8a_from_v64_S (a.s9); + r.sa = v8a_from_v64_S (a.sa); + r.sb = v8a_from_v64_S (a.sb); + r.sc = v8a_from_v64_S (a.sc); + r.sd = v8a_from_v64_S (a.sd); + r.se = v8a_from_v64_S (a.se); + r.sf = v8a_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8b_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8b_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8b_from_v64_S (a.s0); + r.s1 = v8b_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8b_from_v64_S (a.s2); + r.s3 = v8b_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8b_from_v64_S (a.s4); + r.s5 = v8b_from_v64_S (a.s5); + r.s6 = v8b_from_v64_S (a.s6); + r.s7 = v8b_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8b_from_v64_S (a.s8); + r.s9 = v8b_from_v64_S (a.s9); + r.sa = v8b_from_v64_S (a.sa); + r.sb = v8b_from_v64_S (a.sb); + r.sc = v8b_from_v64_S (a.sc); + r.sd = v8b_from_v64_S (a.sd); + r.se = v8b_from_v64_S (a.se); + r.sf = v8b_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8c_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8c_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8c_from_v64_S (a.s0); + r.s1 = v8c_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8c_from_v64_S (a.s2); + r.s3 = v8c_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8c_from_v64_S (a.s4); + r.s5 = v8c_from_v64_S (a.s5); + r.s6 = v8c_from_v64_S (a.s6); + r.s7 = v8c_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8c_from_v64_S (a.s8); + r.s9 = v8c_from_v64_S (a.s9); + r.sa = v8c_from_v64_S (a.sa); + r.sb = v8c_from_v64_S (a.sb); + r.sc = v8c_from_v64_S (a.sc); + r.sd = v8c_from_v64_S (a.sd); + r.se = v8c_from_v64_S (a.se); + r.sf = v8c_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8d_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8d_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8d_from_v64_S (a.s0); + r.s1 = v8d_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8d_from_v64_S (a.s2); + r.s3 = v8d_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8d_from_v64_S (a.s4); + r.s5 = v8d_from_v64_S (a.s5); + r.s6 = v8d_from_v64_S (a.s6); + r.s7 = v8d_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8d_from_v64_S (a.s8); + r.s9 = v8d_from_v64_S (a.s9); + r.sa = v8d_from_v64_S (a.sa); + r.sb = v8d_from_v64_S (a.sb); + r.sc = v8d_from_v64_S (a.sc); + r.sd = v8d_from_v64_S (a.sd); + r.se = v8d_from_v64_S (a.se); + r.sf = v8d_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8e_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8e_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8e_from_v64_S (a.s0); + r.s1 = v8e_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8e_from_v64_S (a.s2); + r.s3 = v8e_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8e_from_v64_S (a.s4); + r.s5 = v8e_from_v64_S (a.s5); + r.s6 = v8e_from_v64_S (a.s6); + r.s7 = v8e_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8e_from_v64_S (a.s8); + r.s9 = v8e_from_v64_S (a.s9); + r.sa = v8e_from_v64_S (a.sa); + r.sb = v8e_from_v64_S (a.sb); + r.sc = v8e_from_v64_S (a.sc); + r.sd = v8e_from_v64_S (a.sd); + r.se = v8e_from_v64_S (a.se); + r.sf = v8e_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8f_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8f_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8f_from_v64_S (a.s0); + r.s1 = v8f_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8f_from_v64_S (a.s2); + r.s3 = v8f_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8f_from_v64_S (a.s4); + r.s5 = v8f_from_v64_S (a.s5); + r.s6 = v8f_from_v64_S (a.s6); + r.s7 = v8f_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8f_from_v64_S (a.s8); + r.s9 = v8f_from_v64_S (a.s9); + r.sa = v8f_from_v64_S (a.sa); + r.sb = v8f_from_v64_S (a.sb); + r.sc = v8f_from_v64_S (a.sc); + r.sd = v8f_from_v64_S (a.sd); + r.se = v8f_from_v64_S (a.se); + r.sf = v8f_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8g_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8g_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8g_from_v64_S (a.s0); + r.s1 = v8g_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8g_from_v64_S (a.s2); + r.s3 = v8g_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8g_from_v64_S (a.s4); + r.s5 = v8g_from_v64_S (a.s5); + r.s6 = v8g_from_v64_S (a.s6); + r.s7 = v8g_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8g_from_v64_S (a.s8); + r.s9 = v8g_from_v64_S (a.s9); + r.sa = v8g_from_v64_S (a.sa); + r.sb = v8g_from_v64_S (a.sb); + r.sc = v8g_from_v64_S (a.sc); + r.sd = v8g_from_v64_S (a.sd); + r.se = v8g_from_v64_S (a.se); + r.sf = v8g_from_v64_S (a.sf); + #endif + + return r; +} + +DECLSPEC u8x v8h_from_v64 (u64x a) +{ + u8x r = 0; + + #if VECT_SIZE == 1 + r = v8h_from_v64_S (a); + #endif + + #if VECT_SIZE >= 2 + r.s0 = v8h_from_v64_S (a.s0); + r.s1 = v8h_from_v64_S (a.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = v8h_from_v64_S (a.s2); + r.s3 = v8h_from_v64_S (a.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = v8h_from_v64_S (a.s4); + r.s5 = v8h_from_v64_S (a.s5); + r.s6 = v8h_from_v64_S (a.s6); + r.s7 = v8h_from_v64_S (a.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = v8h_from_v64_S (a.s8); + r.s9 = v8h_from_v64_S (a.s9); + r.sa = v8h_from_v64_S (a.sa); + r.sb = v8h_from_v64_S (a.sb); + r.sc = v8h_from_v64_S (a.sc); + r.sd = v8h_from_v64_S (a.sd); + r.se = v8h_from_v64_S (a.se); + r.sf = v8h_from_v64_S (a.sf); + #endif + + return r; +} + DECLSPEC u16 v16a_from_v32_S (const u32 v32) { vconv32_t v; diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index 7119ccf5e..8715ae75e 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -142,6 +142,24 @@ DECLSPEC u8 v8b_from_v32_S (const u32 v32); DECLSPEC u8 v8c_from_v32_S (const u32 v32); DECLSPEC u8 v8d_from_v32_S (const u32 v32); +DECLSPEC u8 v8a_from_v64_S (const u64 v64); +DECLSPEC u8 v8b_from_v64_S (const u64 v64); +DECLSPEC u8 v8c_from_v64_S (const u64 v64); +DECLSPEC u8 v8d_from_v64_S (const u64 v64); +DECLSPEC u8 v8e_from_v64_S (const u64 v64); +DECLSPEC u8 v8f_from_v64_S (const u64 v64); +DECLSPEC u8 v8g_from_v64_S (const u64 v64); +DECLSPEC u8 v8h_from_v64_S (const u64 v64); + +DECLSPEC u8x v8a_from_v64 (const u64x v64); +DECLSPEC u8x v8b_from_v64 (const u64x v64); +DECLSPEC u8x v8c_from_v64 (const u64x v64); +DECLSPEC u8x v8d_from_v64 (const u64x v64); +DECLSPEC u8x v8e_from_v64 (const u64x v64); +DECLSPEC u8x v8f_from_v64 (const u64x v64); +DECLSPEC u8x v8g_from_v64 (const u64x v64); +DECLSPEC u8x v8h_from_v64 (const u64x v64); + DECLSPEC u16 v16a_from_v32_S (const u32 v32); DECLSPEC u16 v16b_from_v32_S (const u32 v32); diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index e77734a4e..2cd08dd91 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -615,14 +615,14 @@ DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, #endif for (int i = 0; i < 8; i++) { - const u8 Lp0 = K[(i + 8) & 7] >> 56; - const u8 Lp1 = K[(i + 7) & 7] >> 48; - const u8 Lp2 = K[(i + 6) & 7] >> 40; - const u8 Lp3 = K[(i + 5) & 7] >> 32; - const u8 Lp4 = K[(i + 4) & 7] >> 24; - const u8 Lp5 = K[(i + 3) & 7] >> 16; - const u8 Lp6 = K[(i + 2) & 7] >> 8; - const u8 Lp7 = K[(i + 1) & 7] >> 0; + const u8 Lp0 = v8h_from_v64_S (K[(i + 8) & 7]); + const u8 Lp1 = v8g_from_v64_S (K[(i + 7) & 7]); + const u8 Lp2 = v8f_from_v64_S (K[(i + 6) & 7]); + const u8 Lp3 = v8e_from_v64_S (K[(i + 5) & 7]); + const u8 Lp4 = v8d_from_v64_S (K[(i + 4) & 7]); + const u8 Lp5 = v8c_from_v64_S (K[(i + 3) & 7]); + const u8 Lp6 = v8b_from_v64_S (K[(i + 2) & 7]); + const u8 Lp7 = v8a_from_v64_S (K[(i + 1) & 7]); const u64 X0 = BOX64_S (s_MT, 0, Lp0); const u64 X1 = BOX64_S (s_MT, 1, Lp1); @@ -659,14 +659,14 @@ DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, #endif for (int i = 0; i < 8; i++) { - const u8 Lp0 = state[(i + 8) & 7] >> 56; - const u8 Lp1 = state[(i + 7) & 7] >> 48; - const u8 Lp2 = state[(i + 6) & 7] >> 40; - const u8 Lp3 = state[(i + 5) & 7] >> 32; - const u8 Lp4 = state[(i + 4) & 7] >> 24; - const u8 Lp5 = state[(i + 3) & 7] >> 16; - const u8 Lp6 = state[(i + 2) & 7] >> 8; - const u8 Lp7 = state[(i + 1) & 7] >> 0; + const u8 Lp0 = v8h_from_v64_S (state[(i + 8) & 7]); + const u8 Lp1 = v8g_from_v64_S (state[(i + 7) & 7]); + const u8 Lp2 = v8f_from_v64_S (state[(i + 6) & 7]); + const u8 Lp3 = v8e_from_v64_S (state[(i + 5) & 7]); + const u8 Lp4 = v8d_from_v64_S (state[(i + 4) & 7]); + const u8 Lp5 = v8c_from_v64_S (state[(i + 3) & 7]); + const u8 Lp6 = v8b_from_v64_S (state[(i + 2) & 7]); + const u8 Lp7 = v8a_from_v64_S (state[(i + 1) & 7]); const u64 X0 = BOX64_S (s_MT, 0, Lp0); const u64 X1 = BOX64_S (s_MT, 1, Lp1); @@ -1861,14 +1861,14 @@ DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const #endif for (int i = 0; i < 8; i++) { - const u8x Lp0 = K[(i + 8) & 7] >> 56; - const u8x Lp1 = K[(i + 7) & 7] >> 48; - const u8x Lp2 = K[(i + 6) & 7] >> 40; - const u8x Lp3 = K[(i + 5) & 7] >> 32; - const u8x Lp4 = K[(i + 4) & 7] >> 24; - const u8x Lp5 = K[(i + 3) & 7] >> 16; - const u8x Lp6 = K[(i + 2) & 7] >> 8; - const u8x Lp7 = K[(i + 1) & 7] >> 0; + const u8x Lp0 = v8h_from_v64 (K[(i + 8) & 7]); + const u8x Lp1 = v8g_from_v64 (K[(i + 7) & 7]); + const u8x Lp2 = v8f_from_v64 (K[(i + 6) & 7]); + const u8x Lp3 = v8e_from_v64 (K[(i + 5) & 7]); + const u8x Lp4 = v8d_from_v64 (K[(i + 4) & 7]); + const u8x Lp5 = v8c_from_v64 (K[(i + 3) & 7]); + const u8x Lp6 = v8b_from_v64 (K[(i + 2) & 7]); + const u8x Lp7 = v8a_from_v64 (K[(i + 1) & 7]); const u64x X0 = BOX64 (s_MT, 0, Lp0); const u64x X1 = BOX64 (s_MT, 1, Lp1); @@ -1905,14 +1905,14 @@ DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const #endif for (int i = 0; i < 8; i++) { - const u8x Lp0 = state[(i + 8) & 7] >> 56; - const u8x Lp1 = state[(i + 7) & 7] >> 48; - const u8x Lp2 = state[(i + 6) & 7] >> 40; - const u8x Lp3 = state[(i + 5) & 7] >> 32; - const u8x Lp4 = state[(i + 4) & 7] >> 24; - const u8x Lp5 = state[(i + 3) & 7] >> 16; - const u8x Lp6 = state[(i + 2) & 7] >> 8; - const u8x Lp7 = state[(i + 1) & 7] >> 0; + const u8x Lp0 = v8h_from_v64 (state[(i + 8) & 7]); + const u8x Lp1 = v8g_from_v64 (state[(i + 7) & 7]); + const u8x Lp2 = v8f_from_v64 (state[(i + 6) & 7]); + const u8x Lp3 = v8e_from_v64 (state[(i + 5) & 7]); + const u8x Lp4 = v8d_from_v64 (state[(i + 4) & 7]); + const u8x Lp5 = v8c_from_v64 (state[(i + 3) & 7]); + const u8x Lp6 = v8b_from_v64 (state[(i + 2) & 7]); + const u8x Lp7 = v8a_from_v64 (state[(i + 1) & 7]); const u64x X0 = BOX64 (s_MT, 0, Lp0); const u64x X1 = BOX64 (s_MT, 1, Lp1); diff --git a/tools/benchmark_deep.pl b/tools/benchmark_deep.pl index ee562403b..01e898839 100755 --- a/tools/benchmark_deep.pl +++ b/tools/benchmark_deep.pl @@ -16,7 +16,7 @@ my $default_mask = "?b?b?b?b?b?b?b"; my $result = "result.txt"; my $old_hashcat = 0; # requires to have ran with new hashcat before to create the hashfiles my $repeats = 1; -my $cpu_benchmark = 0; +my $cpu_benchmark = 1; print "\nHardware preparations... You may need to adjust some settings and probably can ignore some of the error\n\n";