From 6102347ab033d57d6e0bb063398a4923dbafe1dd Mon Sep 17 00:00:00 2001 From: Zgzorx Date: Wed, 22 Feb 2023 11:44:42 +0100 Subject: [PATCH] First performance optimization for SM3 hash on a0 and a1 optimized kernels --- OpenCL/m31100_a0-optimized.cl | 5 +++++ OpenCL/m31100_a1-optimized.cl | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/OpenCL/m31100_a0-optimized.cl b/OpenCL/m31100_a0-optimized.cl index b24143c08..0527670b3 100644 --- a/OpenCL/m31100_a0-optimized.cl +++ b/OpenCL/m31100_a0-optimized.cl @@ -213,6 +213,8 @@ KERNEL_FQ void m31100_s04 (KERN_ATTR_RULES ()) digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] }; + const u32 d_rev = hc_rotr32_S (search[0], 9); + /** * loop */ @@ -322,6 +324,9 @@ KERNEL_FQ void m31100_s04 (KERN_ATTR_RULES ()) we_t = SM3_EXPAND(we_t, w5_t, wb_t, w1_t, w8_t); SM3_ROUND2(c, d, a, b, g, h, e, f, SM3_T58, wa_t, wa_t ^ we_t); wf_t = SM3_EXPAND(wf_t, w6_t, wc_t, w2_t, w9_t); SM3_ROUND2(b, c, d, a, f, g, h, e, SM3_T59, wb_t, wb_t ^ wf_t); w0_t = SM3_EXPAND(w0_t, w7_t, wd_t, w3_t, wa_t); SM3_ROUND2(a, b, c, d, e, f, g, h, SM3_T60, wc_t, wc_t ^ w0_t); + + if (MATCHES_NONE_VS (d, d_rev)) continue; + w1_t = SM3_EXPAND(w1_t, w8_t, we_t, w4_t, wb_t); SM3_ROUND2(d, a, b, c, h, e, f, g, SM3_T61, wd_t, wd_t ^ w1_t); w2_t = SM3_EXPAND(w2_t, w9_t, wf_t, w5_t, wc_t); SM3_ROUND2(c, d, a, b, g, h, e, f, SM3_T62, we_t, we_t ^ w2_t); w3_t = SM3_EXPAND(w3_t, wa_t, w0_t, w6_t, wd_t); SM3_ROUND2(b, c, d, a, f, g, h, e, SM3_T63, wf_t, wf_t ^ w3_t); diff --git a/OpenCL/m31100_a1-optimized.cl b/OpenCL/m31100_a1-optimized.cl index aaef407f0..e69ebc274 100644 --- a/OpenCL/m31100_a1-optimized.cl +++ b/OpenCL/m31100_a1-optimized.cl @@ -269,6 +269,8 @@ KERNEL_FQ void m31100_s04 (KERN_ATTR_BASIC ()) digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] }; + const u32 d_rev = hc_rotr32_S (search[0], 9); + /** * loop */ @@ -436,6 +438,9 @@ KERNEL_FQ void m31100_s04 (KERN_ATTR_BASIC ()) we_t = SM3_EXPAND(we_t, w5_t, wb_t, w1_t, w8_t); SM3_ROUND2(c, d, a, b, g, h, e, f, SM3_T58, wa_t, wa_t ^ we_t); wf_t = SM3_EXPAND(wf_t, w6_t, wc_t, w2_t, w9_t); SM3_ROUND2(b, c, d, a, f, g, h, e, SM3_T59, wb_t, wb_t ^ wf_t); w0_t = SM3_EXPAND(w0_t, w7_t, wd_t, w3_t, wa_t); SM3_ROUND2(a, b, c, d, e, f, g, h, SM3_T60, wc_t, wc_t ^ w0_t); + + if (MATCHES_NONE_VS (d, d_rev)) continue; + w1_t = SM3_EXPAND(w1_t, w8_t, we_t, w4_t, wb_t); SM3_ROUND2(d, a, b, c, h, e, f, g, SM3_T61, wd_t, wd_t ^ w1_t); w2_t = SM3_EXPAND(w2_t, w9_t, wf_t, w5_t, wc_t); SM3_ROUND2(c, d, a, b, g, h, e, f, SM3_T62, we_t, we_t ^ w2_t); w3_t = SM3_EXPAND(w3_t, wa_t, w0_t, w6_t, wd_t); SM3_ROUND2(b, c, d, a, f, g, h, e, SM3_T63, wf_t, wf_t ^ w3_t);