optimizing 21200 pure kernels: move sha1(salt) outside the loop

2025-04-05 09:15:42 +00:00 · 2019-08-01 03:10:47 +02:00 · 2019-08-01 03:10:47 +02:00 · 7c08184ea5
commit 7c08184ea5
parent 9e9adfcd7d
3 changed files with 84 additions and 84 deletions
--- a/OpenCL/m21200_a0-pure.cl
+++ b/OpenCL/m21200_a0-pure.cl
@ -73,6 +73,20 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ())
    s[idx] = salt_bufs[salt_pos].salt_buf[idx];
  }

+  sha1_ctx_t ctx0;
+
+  sha1_init (&ctx0);
+
+  sha1_update_swap (&ctx0, s, salt_len);
+
+  sha1_final (&ctx0);
+
+  const u32 a0 = ctx0.h[0];
+  const u32 b0 = ctx0.h[1];
+  const u32 c0 = ctx0.h[2];
+  const u32 d0 = ctx0.h[3];
+  const u32 e0 = ctx0.h[4];
+
  /**
   * loop
   */
@ -88,20 +102,6 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_RULES ())

    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);

-    sha1_ctx_t ctx0;
-
-    sha1_init (&ctx0);
-
-    sha1_update_swap (&ctx0, s, salt_len);
-
-    sha1_final (&ctx0);
-
-    const u32 a0 = ctx0.h[0];
-    const u32 b0 = ctx0.h[1];
-    const u32 c0 = ctx0.h[2];
-    const u32 d0 = ctx0.h[3];
-    const u32 e0 = ctx0.h[4];
-
    md5_ctx_t ctx1;

    md5_init (&ctx1);
@ -244,6 +244,20 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ())
    s[idx] = salt_bufs[salt_pos].salt_buf[idx];
  }

+  sha1_ctx_t ctx0;
+
+  sha1_init (&ctx0);
+
+  sha1_update_swap (&ctx0, s, salt_len);
+
+  sha1_final (&ctx0);
+
+  const u32 a0 = ctx0.h[0];
+  const u32 b0 = ctx0.h[1];
+  const u32 c0 = ctx0.h[2];
+  const u32 d0 = ctx0.h[3];
+  const u32 e0 = ctx0.h[4];
+
  /**
   * loop
   */
@ -259,20 +273,6 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_RULES ())

    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);

-    sha1_ctx_t ctx0;
-
-    sha1_init (&ctx0);
-
-    sha1_update_swap (&ctx0, s, salt_len);
-
-    sha1_final (&ctx0);
-
-    const u32 a0 = ctx0.h[0];
-    const u32 b0 = ctx0.h[1];
-    const u32 c0 = ctx0.h[2];
-    const u32 d0 = ctx0.h[3];
-    const u32 e0 = ctx0.h[4];
-
    md5_ctx_t ctx1;

    md5_init (&ctx1);
--- a/OpenCL/m21200_a1-pure.cl
+++ b/OpenCL/m21200_a1-pure.cl
@ -69,6 +69,20 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
    s[idx] = salt_bufs[salt_pos].salt_buf[idx];
  }

+  sha1_ctx_t ctx0;
+
+  sha1_init (&ctx0);
+
+  sha1_update_swap (&ctx0, s, salt_len);
+
+  sha1_final (&ctx0);
+
+  const u32 a0 = ctx0.h[0];
+  const u32 b0 = ctx0.h[1];
+  const u32 c0 = ctx0.h[2];
+  const u32 d0 = ctx0.h[3];
+  const u32 e0 = ctx0.h[4];
+
  md5_ctx_t ctx11;

  md5_init (&ctx11);
@ -86,20 +100,6 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_BASIC ())
 	
  for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
  {
-    sha1_ctx_t ctx0;
-
-    sha1_init (&ctx0);
-
-    sha1_update_swap (&ctx0, s, salt_len);
-
-    sha1_final (&ctx0);
-
-    const u32 a0 = ctx0.h[0];
-    const u32 b0 = ctx0.h[1];
-    const u32 c0 = ctx0.h[2];
-    const u32 d0 = ctx0.h[3];
-    const u32 e0 = ctx0.h[4];
-
    md5_ctx_t ctx1 = ctx11;

    md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
@ -238,6 +238,20 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())
    s[idx] = salt_bufs[salt_pos].salt_buf[idx];
  }

+  sha1_ctx_t ctx0;
+
+  sha1_init (&ctx0);
+
+  sha1_update_swap (&ctx0, s, salt_len);
+
+  sha1_final (&ctx0);
+
+  const u32 a0 = ctx0.h[0];
+  const u32 b0 = ctx0.h[1];
+  const u32 c0 = ctx0.h[2];
+  const u32 d0 = ctx0.h[3];
+  const u32 e0 = ctx0.h[4];
+
  md5_ctx_t ctx11;

  md5_init (&ctx11);
@ -255,20 +269,6 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_BASIC ())

  for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
  {
-    sha1_ctx_t ctx0;
-
-    sha1_init (&ctx0);
-
-    sha1_update_swap (&ctx0, s, salt_len);
-
-    sha1_final (&ctx0);
-
-    const u32 a0 = ctx0.h[0];
-    const u32 b0 = ctx0.h[1];
-    const u32 c0 = ctx0.h[2];
-    const u32 d0 = ctx0.h[3];
-    const u32 e0 = ctx0.h[4];
-
    md5_ctx_t ctx1 = ctx11;

    md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
--- a/OpenCL/m21200_a3-pure.cl
+++ b/OpenCL/m21200_a3-pure.cl
@ -78,6 +78,20 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ())
    s[idx] = salt_bufs[salt_pos].salt_buf[idx];
  }

+  sha1_ctx_t ctx0;
+
+  sha1_init (&ctx0);
+
+  sha1_update_swap (&ctx0, s, salt_len);
+
+  sha1_final (&ctx0);
+
+  const u32x a0 = ctx0.h[0];
+  const u32x b0 = ctx0.h[1];
+  const u32x c0 = ctx0.h[2];
+  const u32x d0 = ctx0.h[3];
+  const u32x e0 = ctx0.h[4];
+
  /**
   * loop
   */
@ -97,20 +111,6 @@ KERNEL_FQ void m21200_mxx (KERN_ATTR_VECTOR ())

    w[0] = w0;

-    sha1_ctx_vector_t ctx0;
-
-    sha1_init_vector (&ctx0);
-
-    sha1_update_vector_swap (&ctx0, s, salt_len);
-
-    sha1_final_vector (&ctx0);
-
-    const u32x a0 = ctx0.h[0];
-    const u32x b0 = ctx0.h[1];
-    const u32x c0 = ctx0.h[2];
-    const u32x d0 = ctx0.h[3];
-    const u32x e0 = ctx0.h[4];
-
    md5_ctx_vector_t ctx1;

    md5_init_vector (&ctx1);
@ -260,6 +260,20 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ())
    s[idx] = salt_bufs[salt_pos].salt_buf[idx];
  }

+  sha1_ctx_t ctx0;
+
+  sha1_init (&ctx0);
+
+  sha1_update_swap (&ctx0, s, salt_len);
+
+  sha1_final (&ctx0);
+
+  const u32x a0 = ctx0.h[0];
+  const u32x b0 = ctx0.h[1];
+  const u32x c0 = ctx0.h[2];
+  const u32x d0 = ctx0.h[3];
+  const u32x e0 = ctx0.h[4];
+
  /**
   * loop
   */
@ -279,20 +293,6 @@ KERNEL_FQ void m21200_sxx (KERN_ATTR_VECTOR ())

    w[0] = w0;

-    sha1_ctx_vector_t ctx0;
-
-    sha1_init_vector (&ctx0);
-
-    sha1_update_vector_swap (&ctx0, s, salt_len);
-
-    sha1_final_vector (&ctx0);
-
-    const u32x a0 = ctx0.h[0];
-    const u32x b0 = ctx0.h[1];
-    const u32x c0 = ctx0.h[2];
-    const u32x d0 = ctx0.h[3];
-    const u32x e0 = ctx0.h[4];
-
    md5_ctx_vector_t ctx1;

    md5_init_vector (&ctx1);