From 19d3696bd5da232f3f6432c48f5f82075114eb5e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 22 May 2021 11:14:51 +0200
Subject: [PATCH] UTF8-to-UTF16: Replaced naive UTF8 to UTF16 conversion with
 true conversion for RAR3, AES Crypt, MultiBit HD (scrypt) and Umbraco
 HMAC-SHA1

---
 OpenCL/m24800_a0-pure.cl | 42 ++++++++-------------
 OpenCL/m24800_a1-pure.cl | 44 ++++++++++++----------
 OpenCL/m24800_a3-pure.cl | 80 +++++++++++++++++++++-------------------
 docs/changes.txt         |  2 +-
 4 files changed, 83 insertions(+), 85 deletions(-)

diff --git a/OpenCL/m24800_a0-pure.cl b/OpenCL/m24800_a0-pure.cl
index 3c2674424..da2cbc112 100644
--- a/OpenCL/m24800_a0-pure.cl
+++ b/OpenCL/m24800_a0-pure.cl
@@ -43,26 +43,21 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_RULES ())
 
     tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
 
-    // swap endian
-    for (u32 i = 0, idx = 0; i < tmp.pw_len; i += 4, idx += 1)
-    {
-      tmp.i[idx] = hc_swap32 (tmp.i[idx]);
-    }
-
     u32 t[128] = { 0 };
 
-    // make it unicode.
-    for (u32 i = 0, idx = 0; i < tmp.pw_len; i += 16, idx += 4)
-    {
-      make_utf16beN (&tmp.i[idx], &t[(idx * 2) + 0], &t[(idx * 2) + 4]);
-    }
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    const u32 t_len = hc_enc_next (&hc_enc, tmp.i, tmp.pw_len, 256, t, sizeof (t));
 
     // hash time
+
     sha1_hmac_ctx_t ctx;
 
-    sha1_hmac_init (&ctx, t, tmp.pw_len * 2);
+    sha1_hmac_init_swap (&ctx, t, t_len);
 
-    sha1_hmac_update (&ctx, t, tmp.pw_len * 2);
+    sha1_hmac_update_swap (&ctx, t, t_len);
 
     sha1_hmac_final (&ctx);
 
@@ -114,26 +109,21 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_RULES ())
 
     tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
 
-    // swap endian
-    for (u32 i = 0, idx = 0; i < tmp.pw_len; i += 4, idx += 1)
-    {
-      tmp.i[idx] = hc_swap32 (tmp.i[idx]);
-    }
-
     u32 t[128] = { 0 };
 
-    // make it unicode.
-    for (u32 i = 0, idx = 0; i < tmp.pw_len; i += 16, idx += 4)
-    {
-      make_utf16beN (&tmp.i[idx], &t[(idx * 2) + 0], &t[(idx * 2) + 4]);
-    }
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    const u32 t_len = hc_enc_next (&hc_enc, tmp.i, tmp.pw_len, 256, t, sizeof (t));
 
     // hash time
+
     sha1_hmac_ctx_t ctx;
 
-    sha1_hmac_init (&ctx, t, tmp.pw_len * 2);
+    sha1_hmac_init_swap (&ctx, t, t_len);
 
-    sha1_hmac_update (&ctx, t, tmp.pw_len * 2);
+    sha1_hmac_update_swap (&ctx, t, t_len);
 
     sha1_hmac_final (&ctx);
 
diff --git a/OpenCL/m24800_a1-pure.cl b/OpenCL/m24800_a1-pure.cl
index c37c859ff..da321e287 100644
--- a/OpenCL/m24800_a1-pure.cl
+++ b/OpenCL/m24800_a1-pure.cl
@@ -35,7 +35,7 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ())
 
   for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
   {
-    w[idx] = hc_swap32_S (pws[gid].i[idx]);
+    w[idx] = pws[gid].i[idx];
   }
 
   /**
@@ -53,10 +53,10 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ())
     #endif
     for (int idx = 0; idx < 64; idx++)
     {
-      c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      c[idx] = combs_buf[il_pos].i[idx];
     }
 
-    switch_buffer_by_offset_1x64_be_S (c, pw_len);
+    switch_buffer_by_offset_1x64_le_S (c, pw_len);
 
     #ifdef _unroll
     #pragma unroll
@@ -68,17 +68,19 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_BASIC ())
 
     u32 t[128] = { 0 };
 
-    // make it unicode.
-    for (u32 i = 0, idx = 0; i < pw_len + comb_len; i += 16, idx += 4)
-    {
-      make_utf16beN (&c[idx], &t[(idx * 2) + 0], &t[(idx * 2) + 4]);
-    }
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    const u32 t_len = hc_enc_next (&hc_enc, c, pw_len + comb_len, 256, t, sizeof (t));
+
+    // hash time
 
     sha1_hmac_ctx_t ctx;
 
-    sha1_hmac_init (&ctx, t, (pw_len + comb_len) * 2);
+    sha1_hmac_init_swap (&ctx, t, t_len);
 
-    sha1_hmac_update (&ctx, t, (pw_len + comb_len) * 2);
+    sha1_hmac_update_swap (&ctx, t, t_len);
 
     sha1_hmac_final (&ctx);
 
@@ -124,7 +126,7 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ())
 
   for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
   {
-    w[idx] = hc_swap32_S (pws[gid].i[idx]);
+    w[idx] = pws[gid].i[idx];
   }
 
   /**
@@ -142,10 +144,10 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ())
     #endif
     for (int idx = 0; idx < 64; idx++)
     {
-      c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      c[idx] = combs_buf[il_pos].i[idx];
     }
 
-    switch_buffer_by_offset_1x64_be_S (c, pw_len);
+    switch_buffer_by_offset_1x64_le_S (c, pw_len);
 
     #ifdef _unroll
     #pragma unroll
@@ -157,17 +159,19 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_BASIC ())
 
     u32 t[128] = { 0 };
 
-    // make it unicode.
-    for (u32 i = 0, idx = 0; i < pw_len + comb_len; i += 16, idx += 4)
-    {
-      make_utf16beN (&c[idx], &t[(idx * 2) + 0], &t[(idx * 2) + 4]);
-    }
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    const u32 t_len = hc_enc_next (&hc_enc, c, pw_len + comb_len, 256, t, sizeof (t));
+
+    // hash time
 
     sha1_hmac_ctx_t ctx;
 
-    sha1_hmac_init (&ctx, t, (pw_len + comb_len) * 2);
+    sha1_hmac_init_swap (&ctx, t, t_len);
 
-    sha1_hmac_update (&ctx, t, (pw_len + comb_len) * 2);
+    sha1_hmac_update_swap (&ctx, t, t_len);
 
     sha1_hmac_final (&ctx);
 
diff --git a/OpenCL/m24800_a3-pure.cl b/OpenCL/m24800_a3-pure.cl
index 055e7f2e0..9d2bbc0ad 100644
--- a/OpenCL/m24800_a3-pure.cl
+++ b/OpenCL/m24800_a3-pure.cl
@@ -3,14 +3,14 @@
  * License.....: MIT
  */
 
-#define NEW_SIMD_CODE
+//#define NEW_SIMD_CODE
 
 #ifdef KERNEL_STATIC
 #include "inc_vendor.h"
 #include "inc_types.h"
 #include "inc_platform.cl"
 #include "inc_common.cl"
-#include "inc_simd.cl"
+#include "inc_scalar.cl"
 #include "inc_hash_sha1.cl"
 #endif
 
@@ -31,11 +31,11 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len;
 
-  u32x w[64] = { 0 };
+  u32 w[64] = { 0 };
 
   for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
   {
-    w[idx] = pws[gid].i[idx];
+    w[idx] = hc_swap32_S (pws[gid].i[idx]);
   }
 
   /**
@@ -48,32 +48,34 @@ KERNEL_FQ void m24800_mxx (KERN_ATTR_VECTOR ())
   {
     const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
 
-    const u32x w0 = w0l | w0r;
+    const u32x w0 = w0l | hc_swap32_S (w0r);
 
     w[0] = w0;
 
-    u32x t[128] = { 0 };
+    u32 t[128] = { 0 };
 
-    // make it unicode.
-    for (u32 i = 0, idx = 0; i < pw_len; i += 16, idx += 4)
-    {
-      make_utf16beN (&w[idx], &t[(idx * 2) + 0], &t[(idx * 2) + 4]);
-    }
+    hc_enc_t hc_enc;
 
-    sha1_hmac_ctx_vector_t ctx;
+    hc_enc_init (&hc_enc);
 
-    sha1_hmac_init_vector (&ctx, t, pw_len * 2);
+    const u32 t_len = hc_enc_next (&hc_enc, w, pw_len, 256, t, sizeof (t));
 
-    sha1_hmac_update_vector (&ctx, t, pw_len * 2);
+    // hash time
 
-    sha1_hmac_final_vector (&ctx);
+    sha1_hmac_ctx_t ctx;
 
-    const u32x r0 = ctx.opad.h[DGST_R0];
-    const u32x r1 = ctx.opad.h[DGST_R1];
-    const u32x r2 = ctx.opad.h[DGST_R2];
-    const u32x r3 = ctx.opad.h[DGST_R3];
+    sha1_hmac_init_swap (&ctx, t, t_len);
 
-    COMPARE_M_SIMD (r0, r1, r2, r3);
+    sha1_hmac_update_swap (&ctx, t, t_len);
+
+    sha1_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
   }
 }
 
@@ -106,11 +108,11 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_VECTOR ())
 
   const u32 pw_len = pws[gid].pw_len;
 
-  u32x w[64] = { 0 };
+  u32 w[64] = { 0 };
 
   for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
   {
-    w[idx] = pws[gid].i[idx];
+    w[idx] = hc_swap32_S (pws[gid].i[idx]);
   }
 
   /**
@@ -123,31 +125,33 @@ KERNEL_FQ void m24800_sxx (KERN_ATTR_VECTOR ())
   {
     const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
 
-    const u32x w0 = w0l | w0r;
+    const u32x w0 = w0l | hc_swap32_S (w0r);
 
     w[0] = w0;
 
-    u32x t[128] = { 0 };
+    u32 t[128] = { 0 };
+
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    const u32 t_len = hc_enc_next (&hc_enc, w, pw_len, 256, t, sizeof (t));
 
-    // make it unicode.
-    for (u32 i = 0, idx = 0; i < pw_len; i += 16, idx += 4)
-    {
-      make_utf16beN (&w[idx], &t[(idx * 2) + 0], &t[(idx * 2) + 4]);
-    }
+    // hash time
 
-    sha1_hmac_ctx_vector_t ctx;
+    sha1_hmac_ctx_t ctx;
 
-    sha1_hmac_init_vector (&ctx, t, pw_len * 2);
+    sha1_hmac_init_swap (&ctx, t, t_len);
 
-    sha1_hmac_update_vector (&ctx, t, pw_len * 2);
+    sha1_hmac_update_swap (&ctx, t, t_len);
 
-    sha1_hmac_final_vector (&ctx);
+    sha1_hmac_final (&ctx);
 
-    const u32x r0 = ctx.opad.h[DGST_R0];
-    const u32x r1 = ctx.opad.h[DGST_R1];
-    const u32x r2 = ctx.opad.h[DGST_R2];
-    const u32x r3 = ctx.opad.h[DGST_R3];
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
 
-    COMPARE_S_SIMD (r0, r1, r2, r3);
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
   }
 }
diff --git a/docs/changes.txt b/docs/changes.txt
index 2d2acc080..38e4b663a 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -16,7 +16,7 @@
 - CUDA Backend: Do not warn about missing CUDA SDK installation if --stdout is used
 - Performance Monitor: Add -S as a user suggestion to improve cracking performance in specific attack configurations
 - Status Screen: Show currently running kernel type (pure, optimized) and generator type (host, device)
-- UTF8-to-UTF16: Replaced naive UTF8 to UTF16 conversion with true conversion for RAR3, AES Crypt and MultiBit HD (scrypt)
+- UTF8-to-UTF16: Replaced naive UTF8 to UTF16 conversion with true conversion for RAR3, AES Crypt, MultiBit HD (scrypt) and Umbraco HMAC-SHA1
 - AES Crypt Plugin: Reduced max password length from 256 to 128 which improved performance by 22%
 
 ##