From a0de388bb92cf832ee513a207991379805d9bab6 Mon Sep 17 00:00:00 2001
From: Flaggx1 <bigbot@gmail.com>
Date: Sun, 21 May 2023 00:05:08 -0400
Subject: [PATCH 01/83] Increase number of user-defined charsets from 4 to 9
 This feature change increases the number of user-defined charsets from a
 maximum of 4 to 9. This enables more advanced and efficient attacks in
 certain cases.

---
 include/types.h    | 12 ++++++-
 src/brain.c        | 71 +++++++++++++++++++++++++++++++++++++++++
 src/mpsp.c         | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 src/status.c       | 19 +++++++++--
 src/usage.c        |  5 +++
 src/user_options.c | 58 +++++++++++++++++++++++++++++++---
 6 files changed, 234 insertions(+), 9 deletions(-)

diff --git a/include/types.h b/include/types.h
index 6fd4c4553..d059ab416 100644
--- a/include/types.h
+++ b/include/types.h
@@ -757,6 +757,11 @@ typedef enum user_options_map
   IDX_CUSTOM_CHARSET_2          = '2',
   IDX_CUSTOM_CHARSET_3          = '3',
   IDX_CUSTOM_CHARSET_4          = '4',
+  IDX_CUSTOM_CHARSET_5          = '5',
+  IDX_CUSTOM_CHARSET_6          = '6',
+  IDX_CUSTOM_CHARSET_7          = '7',
+  IDX_CUSTOM_CHARSET_8          = '8',
+  IDX_CUSTOM_CHARSET_9          = '9',
   IDX_DEBUG_FILE                = 0xff12,
   IDX_DEBUG_MODE                = 0xff13,
   IDX_DEPRECATED_CHECK_DISABLE  = 0xff14,
@@ -2377,7 +2382,7 @@ typedef struct user_options
   char        *brain_session_whitelist;
   #endif
   char        *cpu_affinity;
-  char        *custom_charset_4;
+  char        *custom_charset_9;
   char        *debug_file;
   char        *induction_dir;
   char        *keyboard_layout_mapping;
@@ -2396,6 +2401,11 @@ typedef struct user_options
   const char  *custom_charset_1;
   const char  *custom_charset_2;
   const char  *custom_charset_3;
+  const char  *custom_charset_4;
+  const char  *custom_charset_5;
+  const char  *custom_charset_6;
+  const char  *custom_charset_7;
+  const char  *custom_charset_8;
   const char  *encoding_from;
   const char  *encoding_to;
   const char  *rule_buf_l;
diff --git a/src/brain.c b/src/brain.c
index bd97d9695..480170bb1 100644
--- a/src/brain.c
+++ b/src/brain.c
@@ -345,7 +345,43 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
 
       XXH64_update (state, custom_charset_4, strlen (custom_charset_4));
     }
+
+    if (user_options->custom_charset_5)
+    {
+      const char *custom_charset_5 = user_options->custom_charset_5;
+
+      XXH64_update (state, custom_charset_5, strlen (custom_charset_5));
+    }
+
+    if (user_options->custom_charset_6)
+    {
+      const char *custom_charset_6 = user_options->custom_charset_6;
+
+      XXH64_update (state, custom_charset_6, strlen (custom_charset_6));
+    }
+
+    if (user_options->custom_charset_7)
+    {
+      const char *custom_charset_7 = user_options->custom_charset_7;
+
+      XXH64_update (state, custom_charset_7, strlen (custom_charset_7));
+    }
+
+    if (user_options->custom_charset_8)
+    {
+      const char *custom_charset_8 = user_options->custom_charset_8;
+
+      XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
+    }
+
+    if (user_options->custom_charset_9)
+    {
+      const char *custom_charset_9 = user_options->custom_charset_9;
+
+      XXH64_update (state, custom_charset_9, strlen (custom_charset_9));
+    }
   }
+
   else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
   {
     const u64 wordlist_hash = brain_compute_attack_wordlist (straight_ctx->dict);
@@ -405,6 +441,41 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
       XXH64_update (state, custom_charset_4, strlen (custom_charset_4));
     }
 
+    if (user_options->custom_charset_5)
+    {
+      const char *custom_charset_5 = user_options->custom_charset_5;
+
+      XXH64_update (state, custom_charset_5, strlen (custom_charset_5));
+    }
+
+    if (user_options->custom_charset_6)
+    {
+      const char *custom_charset_6 = user_options->custom_charset_6;
+
+      XXH64_update (state, custom_charset_6, strlen (custom_charset_6));
+    }
+
+    if (user_options->custom_charset_7)
+    {
+      const char *custom_charset_7 = user_options->custom_charset_7;
+
+      XXH64_update (state, custom_charset_7, strlen (custom_charset_7));
+    }
+
+    if (user_options->custom_charset_8)
+    {
+      const char *custom_charset_8 = user_options->custom_charset_8;
+
+      XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
+    }
+
+    if (user_options->custom_charset_9)
+    {
+      const char *custom_charset_9 = user_options->custom_charset_9;
+
+      XXH64_update (state, custom_charset_9, strlen (custom_charset_9));
+    }
+
     const int hex_wordlist = user_options->hex_wordlist;
 
     XXH64_update (state, &hex_wordlist, sizeof (hex_wordlist));
diff --git a/src/mpsp.c b/src/mpsp.c
index ba5511616..8e84cdba0 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -18,7 +18,7 @@
 
 static const char *const DEF_MASK = "?1?2?2?2?2?2?2?3?3?3?3?d?d?d?d";
 
-#define MAX_MFS 5 // 4*charset, 1*mask
+#define MAX_MFS 10 // 9*charset, 1*mask
 
 static int sp_comp_val (const void *p1, const void *p2)
 {
@@ -314,6 +314,21 @@ static int mp_expand (hashcat_ctx_t *hashcat_ctx, const char *in_buf, size_t in_
         case '4': if (mp_usr[3].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 4 is undefined."); return -1; }
                   rc = mp_add_cs_buf (hashcat_ctx, mp_usr[3].cs_buf, mp_usr[3].cs_len, mp_usr, mp_usr_offset);
                   break;
+        case '5': if (mp_usr[4].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 5 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[4].cs_buf, mp_usr[4].cs_len, mp_usr, mp_usr_offset);
+                  break;
+        case '6': if (mp_usr[5].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 6 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[5].cs_buf, mp_usr[5].cs_len, mp_usr, mp_usr_offset);
+                  break;
+        case '7': if (mp_usr[6].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 7 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[6].cs_buf, mp_usr[6].cs_len, mp_usr, mp_usr_offset);
+                  break;
+        case '8': if (mp_usr[7].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 8 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[7].cs_buf, mp_usr[7].cs_len, mp_usr, mp_usr_offset);
+                  break;
+        case '9': if (mp_usr[8].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 9 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[8].cs_buf, mp_usr[8].cs_len, mp_usr, mp_usr_offset);
+                  break;
         case '?': rc = mp_add_cs_buf (hashcat_ctx, &p0, 1, mp_usr, mp_usr_offset);
                   break;
         default:  event_log_error (hashcat_ctx, "Syntax error in mask: %s", in_buf);
@@ -427,6 +442,21 @@ static int mp_gen_css (hashcat_ctx_t *hashcat_ctx, char *mask_buf, size_t mask_l
         case '4': if (mp_usr[3].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 4 is undefined."); return -1; }
                   rc = mp_add_cs_buf (hashcat_ctx, mp_usr[3].cs_buf, mp_usr[3].cs_len, css_buf, css_pos);
                   break;
+        case '5': if (mp_usr[4].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 5 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[4].cs_buf, mp_usr[4].cs_len, css_buf, css_pos);
+                  break;
+        case '6': if (mp_usr[5].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 6 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[5].cs_buf, mp_usr[5].cs_len, css_buf, css_pos);
+                  break;
+        case '7': if (mp_usr[6].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 7 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[6].cs_buf, mp_usr[6].cs_len, css_buf, css_pos);
+                  break;
+        case '8': if (mp_usr[7].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 8 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[7].cs_buf, mp_usr[7].cs_len, css_buf, css_pos);
+                  break;
+        case '9': if (mp_usr[8].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 9 is undefined."); return -1; }
+                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[8].cs_buf, mp_usr[8].cs_len, css_buf, css_pos);
+                  break;
         case '?': rc = mp_add_cs_buf (hashcat_ctx, &chr, 1, css_buf, css_pos);
                   break;
         default:  event_log_error (hashcat_ctx, "Syntax error in mask: %s", mask_buf);
@@ -1416,7 +1446,7 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx)
   mask_ctx->enabled = true;
 
   mask_ctx->mp_sys  = (cs_t *) hccalloc (8, sizeof (cs_t));
-  mask_ctx->mp_usr  = (cs_t *) hccalloc (4, sizeof (cs_t));
+  mask_ctx->mp_usr  = (cs_t *) hccalloc (9, sizeof (cs_t));
 
   mask_ctx->css_buf = (cs_t *) hccalloc (256, sizeof (cs_t));
   mask_ctx->css_cnt = 0;
@@ -1443,6 +1473,11 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->custom_charset_2) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_2, 1) == -1) return -1; }
   if (user_options->custom_charset_3) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_3, 2) == -1) return -1; }
   if (user_options->custom_charset_4) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_4, 3) == -1) return -1; }
+  if (user_options->custom_charset_5) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_5, 4) == -1) return -1; }
+  if (user_options->custom_charset_6) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_6, 5) == -1) return -1; }
+  if (user_options->custom_charset_7) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_7, 6) == -1) return -1; }
+  if (user_options->custom_charset_8) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_8, 7) == -1) return -1; }
+  if (user_options->custom_charset_9) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_9, 8) == -1) return -1; }
 
   if (user_options->benchmark == true)
   {
@@ -1739,6 +1774,10 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
   mfs_buf[2].mf_len = 0;
   mfs_buf[3].mf_len = 0;
   mfs_buf[4].mf_len = 0;
+  mfs_buf[5].mf_len = 0;
+  mfs_buf[6].mf_len = 0;
+  mfs_buf[7].mf_len = 0;
+  mfs_buf[8].mf_len = 0;
 
   size_t mfs_cnt = 0;
 
@@ -1796,11 +1835,21 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
   user_options->custom_charset_2 = NULL;
   user_options->custom_charset_3 = NULL;
   user_options->custom_charset_4 = NULL;
+  user_options->custom_charset_5 = NULL;
+  user_options->custom_charset_6 = NULL;
+  user_options->custom_charset_7 = NULL;
+  user_options->custom_charset_8 = NULL;
+  user_options->custom_charset_9 = NULL;
 
   mp_reset_usr (mask_ctx->mp_usr, 0);
   mp_reset_usr (mask_ctx->mp_usr, 1);
   mp_reset_usr (mask_ctx->mp_usr, 2);
   mp_reset_usr (mask_ctx->mp_usr, 3);
+  mp_reset_usr (mask_ctx->mp_usr, 4);
+  mp_reset_usr (mask_ctx->mp_usr, 5);
+  mp_reset_usr (mask_ctx->mp_usr, 6);
+  mp_reset_usr (mask_ctx->mp_usr, 7);
+  mp_reset_usr (mask_ctx->mp_usr, 8);
 
   for (size_t i = 0; i < mfs_cnt; i++)
   {
@@ -1825,6 +1874,31 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_4 = mfs_buf[3].mf_buf;
         mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_4, 3);
         break;
+
+      case 4:
+        user_options->custom_charset_5 = mfs_buf[4].mf_buf;
+        mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_5, 4);
+        break;
+
+      case 5:
+        user_options->custom_charset_6 = mfs_buf[5].mf_buf;
+        mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_6, 5);
+        break;
+
+      case 6:
+        user_options->custom_charset_7 = mfs_buf[6].mf_buf;
+        mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_7, 6);
+        break;
+
+      case 7:
+        user_options->custom_charset_8 = mfs_buf[7].mf_buf;
+        mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_8, 7);
+        break;
+
+      case 8:
+        user_options->custom_charset_9 = mfs_buf[8].mf_buf;
+        mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_9, 8);
+        break;
     }
   }
 
diff --git a/src/status.c b/src/status.c
index 4e964fd3b..ccdf520f2 100644
--- a/src/status.c
+++ b/src/status.c
@@ -394,6 +394,11 @@ int status_get_guess_mode (const hashcat_ctx_t *hashcat_ctx)
   if (user_options->custom_charset_2) has_mask_cs = true;
   if (user_options->custom_charset_3) has_mask_cs = true;
   if (user_options->custom_charset_4) has_mask_cs = true;
+  if (user_options->custom_charset_5) has_mask_cs = true;
+  if (user_options->custom_charset_6) has_mask_cs = true;
+  if (user_options->custom_charset_7) has_mask_cs = true;
+  if (user_options->custom_charset_8) has_mask_cs = true;
+  if (user_options->custom_charset_9) has_mask_cs = true;
 
   if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
   {
@@ -778,8 +783,13 @@ char *status_get_guess_charset (const hashcat_ctx_t *hashcat_ctx)
   const char *custom_charset_2 = user_options->custom_charset_2;
   const char *custom_charset_3 = user_options->custom_charset_3;
   const char *custom_charset_4 = user_options->custom_charset_4;
+  const char *custom_charset_5 = user_options->custom_charset_5;
+  const char *custom_charset_6 = user_options->custom_charset_6;
+  const char *custom_charset_7 = user_options->custom_charset_7;
+  const char *custom_charset_8 = user_options->custom_charset_8;
+  const char *custom_charset_9 = user_options->custom_charset_9;
 
-  if ((custom_charset_1 != NULL) || (custom_charset_2 != NULL) || (custom_charset_3 != NULL) || (custom_charset_4 != NULL))
+  if ((custom_charset_1 != NULL) || (custom_charset_2 != NULL) || (custom_charset_3 != NULL) || (custom_charset_4 != NULL) || (custom_charset_5 != NULL) || (custom_charset_6 != NULL) || (custom_charset_7 != NULL) || (custom_charset_8 != NULL) || (custom_charset_9 != NULL))
   {
     char *tmp_buf;
 
@@ -787,8 +797,13 @@ char *status_get_guess_charset (const hashcat_ctx_t *hashcat_ctx)
     if (custom_charset_2 == NULL) custom_charset_2 = "Undefined";
     if (custom_charset_3 == NULL) custom_charset_3 = "Undefined";
     if (custom_charset_4 == NULL) custom_charset_4 = "Undefined";
+    if (custom_charset_5 == NULL) custom_charset_5 = "Undefined";
+    if (custom_charset_6 == NULL) custom_charset_6 = "Undefined";
+    if (custom_charset_7 == NULL) custom_charset_7 = "Undefined";
+    if (custom_charset_8 == NULL) custom_charset_8 = "Undefined";
+    if (custom_charset_9 == NULL) custom_charset_9 = "Undefined";
 
-    hc_asprintf (&tmp_buf, "-1 %s, -2 %s, -3 %s, -4 %s", custom_charset_1, custom_charset_2, custom_charset_3, custom_charset_4);
+    hc_asprintf (&tmp_buf, "-1 %s, -2 %s, -3 %s, -4 %s, -5 %s, -6 %s, -7 %s, -8 %s, -9 %s", custom_charset_1, custom_charset_2, custom_charset_3, custom_charset_4, custom_charset_5, custom_charset_6, custom_charset_7, custom_charset_8, custom_charset_9);
 
     return tmp_buf;
   }
diff --git a/src/usage.c b/src/usage.c
index c79a8c6ac..6de2fab85 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -128,6 +128,11 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   " -2, --custom-charset2          | CS   | User-defined charset ?2                              | -2 ?l?d?s",
   " -3, --custom-charset3          | CS   | User-defined charset ?3                              |",
   " -4, --custom-charset4          | CS   | User-defined charset ?4                              |",
+  " -5, --custom-charset5          | CS   | User-defined charset ?5                              |",
+  " -6, --custom-charset6          | CS   | User-defined charset ?6                              |",
+  " -7, --custom-charset7          | CS   | User-defined charset ?7                              |",
+  " -8, --custom-charset8          | CS   | User-defined charset ?8                              |",
+  " -9, --custom-charset9          | CS   | User-defined charset ?9                              |",
   "     --identify                 |      | Shows all supported algorithms for input hashes      | --identify my.hash",
   " -i, --increment                |      | Enable mask increment mode                           |",
   "     --increment-min            | Num  | Start mask incrementing at X                         | --increment-min=4",
diff --git a/src/user_options.c b/src/user_options.c
index a510c1d69..de84bd017 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -21,9 +21,9 @@
 #endif
 
 #ifdef WITH_BRAIN
-static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSY:z";
+static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:5:6:7:8:9:iIbw:OMSY:z";
 #else
-static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSY:";
+static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:5:6:7:8:9:iIbw:OMSY:";
 #endif
 
 static char *const SEPARATOR = ":";
@@ -51,6 +51,11 @@ static const struct option long_options[] =
   {"custom-charset2",           required_argument, NULL, IDX_CUSTOM_CHARSET_2},
   {"custom-charset3",           required_argument, NULL, IDX_CUSTOM_CHARSET_3},
   {"custom-charset4",           required_argument, NULL, IDX_CUSTOM_CHARSET_4},
+  {"custom-charset5",           required_argument, NULL, IDX_CUSTOM_CHARSET_5},
+  {"custom-charset6",           required_argument, NULL, IDX_CUSTOM_CHARSET_6},
+  {"custom-charset7",           required_argument, NULL, IDX_CUSTOM_CHARSET_7},
+  {"custom-charset8",           required_argument, NULL, IDX_CUSTOM_CHARSET_8},
+  {"custom-charset9",           required_argument, NULL, IDX_CUSTOM_CHARSET_9},
   {"debug-file",                required_argument, NULL, IDX_DEBUG_FILE},
   {"debug-mode",                required_argument, NULL, IDX_DEBUG_MODE},
   {"deprecated-check-disable",  no_argument,       NULL, IDX_DEPRECATED_CHECK_DISABLE},
@@ -164,6 +169,11 @@ static const char *const RULE_BUF_L = ":";
 static const char *const DEF_MASK_CS_1 = "?l?d?u";
 static const char *const DEF_MASK_CS_2 = "?l?d";
 static const char *const DEF_MASK_CS_3 = "?l?d*!$@_";
+static const char *const DEF_MASK_CS_4 = "a";
+static const char *const DEF_MASK_CS_5 = "b";
+static const char *const DEF_MASK_CS_6 = "c";
+static const char *const DEF_MASK_CS_7 = "d";
+static const char *const DEF_MASK_CS_8 = "e";
 
 int user_options_init (hashcat_ctx_t *hashcat_ctx)
 {
@@ -201,6 +211,11 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->custom_charset_2          = NULL;
   user_options->custom_charset_3          = NULL;
   user_options->custom_charset_4          = NULL;
+  user_options->custom_charset_5          = NULL;
+  user_options->custom_charset_6          = NULL;
+  user_options->custom_charset_7          = NULL;
+  user_options->custom_charset_8          = NULL;
+  user_options->custom_charset_9          = NULL;
   user_options->debug_file                = NULL;
   user_options->debug_mode                = DEBUG_MODE;
   user_options->deprecated_check_disable  = DEPRECATED_CHECK_DISABLE;
@@ -523,6 +538,11 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_CUSTOM_CHARSET_2:          user_options->custom_charset_2          = optarg;                          break;
       case IDX_CUSTOM_CHARSET_3:          user_options->custom_charset_3          = optarg;                          break;
       case IDX_CUSTOM_CHARSET_4:          user_options->custom_charset_4          = optarg;                          break;
+      case IDX_CUSTOM_CHARSET_5:          user_options->custom_charset_5          = optarg;                          break;
+      case IDX_CUSTOM_CHARSET_6:          user_options->custom_charset_6          = optarg;                          break;
+      case IDX_CUSTOM_CHARSET_7:          user_options->custom_charset_7          = optarg;                          break;
+      case IDX_CUSTOM_CHARSET_8:          user_options->custom_charset_8          = optarg;                          break;
+      case IDX_CUSTOM_CHARSET_9:          user_options->custom_charset_9          = optarg;                          break;
       case IDX_SLOW_CANDIDATES:           user_options->slow_candidates           = true;                            break;
       #ifdef WITH_BRAIN
       case IDX_BRAIN_CLIENT:              user_options->brain_client              = true;                            break;
@@ -1313,7 +1333,12 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     if ((user_options->custom_charset_1 != NULL)
      || (user_options->custom_charset_2 != NULL)
      || (user_options->custom_charset_3 != NULL)
-     || (user_options->custom_charset_4 != NULL))
+     || (user_options->custom_charset_4 != NULL)
+     || (user_options->custom_charset_5 != NULL)
+     || (user_options->custom_charset_6 != NULL)
+     || (user_options->custom_charset_7 != NULL)
+     || (user_options->custom_charset_8 != NULL)
+     || (user_options->custom_charset_9 != NULL))
     {
       if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
       {
@@ -1465,7 +1490,12 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
   if ((user_options->custom_charset_1 != NULL)
    || (user_options->custom_charset_2 != NULL)
    || (user_options->custom_charset_3 != NULL)
-   || (user_options->custom_charset_4 != NULL))
+   || (user_options->custom_charset_4 != NULL)
+   || (user_options->custom_charset_5 != NULL)
+   || (user_options->custom_charset_6 != NULL)
+   || (user_options->custom_charset_7 != NULL)
+   || (user_options->custom_charset_8 != NULL)
+   || (user_options->custom_charset_9 != NULL))
   {
     if (user_options->attack_mode == ATTACK_MODE_STRAIGHT)
     {
@@ -2009,6 +2039,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_1 = DEF_MASK_CS_1;
         user_options->custom_charset_2 = DEF_MASK_CS_2;
         user_options->custom_charset_3 = DEF_MASK_CS_3;
+        user_options->custom_charset_4 = DEF_MASK_CS_4;
+        user_options->custom_charset_5 = DEF_MASK_CS_5;
+        user_options->custom_charset_6 = DEF_MASK_CS_6;
+        user_options->custom_charset_7 = DEF_MASK_CS_7;
+        user_options->custom_charset_8 = DEF_MASK_CS_8;
 
         user_options->increment = true;
       }
@@ -2020,6 +2055,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_1 = DEF_MASK_CS_1;
         user_options->custom_charset_2 = DEF_MASK_CS_2;
         user_options->custom_charset_3 = DEF_MASK_CS_3;
+        user_options->custom_charset_4 = DEF_MASK_CS_4;
+        user_options->custom_charset_5 = DEF_MASK_CS_5;
+        user_options->custom_charset_6 = DEF_MASK_CS_6;
+        user_options->custom_charset_7 = DEF_MASK_CS_7;
+        user_options->custom_charset_8 = DEF_MASK_CS_8;
 
         user_options->increment = true;
       }
@@ -2031,6 +2071,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_1 = DEF_MASK_CS_1;
         user_options->custom_charset_2 = DEF_MASK_CS_2;
         user_options->custom_charset_3 = DEF_MASK_CS_3;
+        user_options->custom_charset_4 = DEF_MASK_CS_4;
+        user_options->custom_charset_5 = DEF_MASK_CS_5;
+        user_options->custom_charset_6 = DEF_MASK_CS_6;
+        user_options->custom_charset_7 = DEF_MASK_CS_7;
+        user_options->custom_charset_8 = DEF_MASK_CS_8;
 
         user_options->increment = true;
       }
@@ -3167,6 +3212,11 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx)
   logfile_top_string (user_options->custom_charset_2);
   logfile_top_string (user_options->custom_charset_3);
   logfile_top_string (user_options->custom_charset_4);
+  logfile_top_string (user_options->custom_charset_5);
+  logfile_top_string (user_options->custom_charset_6);
+  logfile_top_string (user_options->custom_charset_7);
+  logfile_top_string (user_options->custom_charset_8);
+  logfile_top_string (user_options->custom_charset_9);
   logfile_top_string (user_options->debug_file);
   logfile_top_string (user_options->encoding_from);
   logfile_top_string (user_options->encoding_to);

From b437459e5ba0e7d3428a59cd1460050e90f347af Mon Sep 17 00:00:00 2001
From: Flaggx1 <bigbot@gmail.com>
Date: Sun, 21 May 2023 02:10:14 -0400
Subject: [PATCH 02/83] Rename "Undefined" to "Undef" in status output for
 unused user charsets

---
 src/status.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/status.c b/src/status.c
index ccdf520f2..4c973bdab 100644
--- a/src/status.c
+++ b/src/status.c
@@ -793,15 +793,15 @@ char *status_get_guess_charset (const hashcat_ctx_t *hashcat_ctx)
   {
     char *tmp_buf;
 
-    if (custom_charset_1 == NULL) custom_charset_1 = "Undefined";
-    if (custom_charset_2 == NULL) custom_charset_2 = "Undefined";
-    if (custom_charset_3 == NULL) custom_charset_3 = "Undefined";
-    if (custom_charset_4 == NULL) custom_charset_4 = "Undefined";
-    if (custom_charset_5 == NULL) custom_charset_5 = "Undefined";
-    if (custom_charset_6 == NULL) custom_charset_6 = "Undefined";
-    if (custom_charset_7 == NULL) custom_charset_7 = "Undefined";
-    if (custom_charset_8 == NULL) custom_charset_8 = "Undefined";
-    if (custom_charset_9 == NULL) custom_charset_9 = "Undefined";
+    if (custom_charset_1 == NULL) custom_charset_1 = "Undef";
+    if (custom_charset_2 == NULL) custom_charset_2 = "Undef";
+    if (custom_charset_3 == NULL) custom_charset_3 = "Undef";
+    if (custom_charset_4 == NULL) custom_charset_4 = "Undef";
+    if (custom_charset_5 == NULL) custom_charset_5 = "Undef";
+    if (custom_charset_6 == NULL) custom_charset_6 = "Undef";
+    if (custom_charset_7 == NULL) custom_charset_7 = "Undef";
+    if (custom_charset_8 == NULL) custom_charset_8 = "Undef";
+    if (custom_charset_9 == NULL) custom_charset_9 = "Undef";
 
     hc_asprintf (&tmp_buf, "-1 %s, -2 %s, -3 %s, -4 %s, -5 %s, -6 %s, -7 %s, -8 %s, -9 %s", custom_charset_1, custom_charset_2, custom_charset_3, custom_charset_4, custom_charset_5, custom_charset_6, custom_charset_7, custom_charset_8, custom_charset_9);
 

From d299b2833eb80f56e555efbdf486d3f48db51c9b Mon Sep 17 00:00:00 2001
From: Flaggx1 <bigbot@gmail.com>
Date: Thu, 25 Jul 2024 07:07:27 -0400
Subject: [PATCH 03/83] Reduce user-defined charsets to 8. Remove unnecessary
 code.

---
 include/types.h    |  2 --
 src/brain.c        | 14 --------------
 src/mpsp.c         | 19 ++-----------------
 src/status.c       | 23 ++++++++++-------------
 src/usage.c        |  1 -
 src/user_options.c | 34 ++++------------------------------
 6 files changed, 16 insertions(+), 77 deletions(-)

diff --git a/include/types.h b/include/types.h
index 02b83da2b..961cde915 100644
--- a/include/types.h
+++ b/include/types.h
@@ -759,7 +759,6 @@ typedef enum user_options_map
   IDX_CUSTOM_CHARSET_6          = '6',
   IDX_CUSTOM_CHARSET_7          = '7',
   IDX_CUSTOM_CHARSET_8          = '8',
-  IDX_CUSTOM_CHARSET_9          = '9',
   IDX_DEBUG_FILE                = 0xff12,
   IDX_DEBUG_MODE                = 0xff13,
   IDX_DEPRECATED_CHECK_DISABLE  = 0xff14,
@@ -2396,7 +2395,6 @@ typedef struct user_options
   char        *brain_session_whitelist;
   #endif
   char        *cpu_affinity;
-  char        *custom_charset_9;
   char        *debug_file;
   char        *induction_dir;
   char        *keyboard_layout_mapping;
diff --git a/src/brain.c b/src/brain.c
index 646a334d3..f313b5536 100644
--- a/src/brain.c
+++ b/src/brain.c
@@ -373,13 +373,6 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
 
       XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
     }
-
-    if (user_options->custom_charset_9)
-    {
-      const char *custom_charset_9 = user_options->custom_charset_9;
-
-      XXH64_update (state, custom_charset_9, strlen (custom_charset_9));
-    }
   }
 
   else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
@@ -469,13 +462,6 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
       XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
     }
 
-    if (user_options->custom_charset_9)
-    {
-      const char *custom_charset_9 = user_options->custom_charset_9;
-
-      XXH64_update (state, custom_charset_9, strlen (custom_charset_9));
-    }
-
     const int hex_wordlist = user_options->hex_wordlist;
 
     XXH64_update (state, &hex_wordlist, sizeof (hex_wordlist));
diff --git a/src/mpsp.c b/src/mpsp.c
index d694dc242..02d5b7f45 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -18,7 +18,7 @@
 
 static const char *const DEF_MASK = "?1?2?2?2?2?2?2?3?3?3?3?d?d?d?d";
 
-#define MAX_MFS 10 // 9*charset, 1*mask
+#define MAX_MFS 9 // 8*charset, 1*mask
 
 static int sp_comp_val (const void *p1, const void *p2)
 {
@@ -326,9 +326,6 @@ static int mp_expand (hashcat_ctx_t *hashcat_ctx, const char *in_buf, size_t in_
         case '8': if (mp_usr[7].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 8 is undefined."); return -1; }
                   rc = mp_add_cs_buf (hashcat_ctx, mp_usr[7].cs_buf, mp_usr[7].cs_len, mp_usr, mp_usr_offset);
                   break;
-        case '9': if (mp_usr[8].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 9 is undefined."); return -1; }
-                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[8].cs_buf, mp_usr[8].cs_len, mp_usr, mp_usr_offset);
-                  break;
         case '?': rc = mp_add_cs_buf (hashcat_ctx, &p0, 1, mp_usr, mp_usr_offset);
                   break;
         default:  event_log_error (hashcat_ctx, "Syntax error in mask: %s", in_buf);
@@ -454,9 +451,6 @@ static int mp_gen_css (hashcat_ctx_t *hashcat_ctx, char *mask_buf, size_t mask_l
         case '8': if (mp_usr[7].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 8 is undefined."); return -1; }
                   rc = mp_add_cs_buf (hashcat_ctx, mp_usr[7].cs_buf, mp_usr[7].cs_len, css_buf, css_pos);
                   break;
-        case '9': if (mp_usr[8].cs_len == 0) { event_log_error (hashcat_ctx, "Custom-charset 9 is undefined."); return -1; }
-                  rc = mp_add_cs_buf (hashcat_ctx, mp_usr[8].cs_buf, mp_usr[8].cs_len, css_buf, css_pos);
-                  break;
         case '?': rc = mp_add_cs_buf (hashcat_ctx, &chr, 1, css_buf, css_pos);
                   break;
         default:  event_log_error (hashcat_ctx, "Syntax error in mask: %s", mask_buf);
@@ -1446,7 +1440,7 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx)
   mask_ctx->enabled = true;
 
   mask_ctx->mp_sys  = (cs_t *) hccalloc (8, sizeof (cs_t));
-  mask_ctx->mp_usr  = (cs_t *) hccalloc (9, sizeof (cs_t));
+  mask_ctx->mp_usr  = (cs_t *) hccalloc (8, sizeof (cs_t));
 
   mask_ctx->css_buf = (cs_t *) hccalloc (256, sizeof (cs_t));
   mask_ctx->css_cnt = 0;
@@ -1477,7 +1471,6 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx)
   if (user_options->custom_charset_6) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_6, 5) == -1) return -1; }
   if (user_options->custom_charset_7) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_7, 6) == -1) return -1; }
   if (user_options->custom_charset_8) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_8, 7) == -1) return -1; }
-  if (user_options->custom_charset_9) { if (mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_9, 8) == -1) return -1; }
 
   if (user_options->benchmark == true)
   {
@@ -1777,7 +1770,6 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
   mfs_buf[5].mf_len = 0;
   mfs_buf[6].mf_len = 0;
   mfs_buf[7].mf_len = 0;
-  mfs_buf[8].mf_len = 0;
 
   size_t mfs_cnt = 0;
 
@@ -1839,7 +1831,6 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
   user_options->custom_charset_6 = NULL;
   user_options->custom_charset_7 = NULL;
   user_options->custom_charset_8 = NULL;
-  user_options->custom_charset_9 = NULL;
 
   mp_reset_usr (mask_ctx->mp_usr, 0);
   mp_reset_usr (mask_ctx->mp_usr, 1);
@@ -1849,7 +1840,6 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
   mp_reset_usr (mask_ctx->mp_usr, 5);
   mp_reset_usr (mask_ctx->mp_usr, 6);
   mp_reset_usr (mask_ctx->mp_usr, 7);
-  mp_reset_usr (mask_ctx->mp_usr, 8);
 
   for (size_t i = 0; i < mfs_cnt; i++)
   {
@@ -1894,11 +1884,6 @@ int mask_ctx_parse_maskfile (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_8 = mfs_buf[7].mf_buf;
         mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_8, 7);
         break;
-
-      case 8:
-        user_options->custom_charset_9 = mfs_buf[8].mf_buf;
-        mp_setup_usr (hashcat_ctx, mask_ctx->mp_sys, mask_ctx->mp_usr, user_options->custom_charset_9, 8);
-        break;
     }
   }
 
diff --git a/src/status.c b/src/status.c
index 4c973bdab..ac661acd4 100644
--- a/src/status.c
+++ b/src/status.c
@@ -398,7 +398,6 @@ int status_get_guess_mode (const hashcat_ctx_t *hashcat_ctx)
   if (user_options->custom_charset_6) has_mask_cs = true;
   if (user_options->custom_charset_7) has_mask_cs = true;
   if (user_options->custom_charset_8) has_mask_cs = true;
-  if (user_options->custom_charset_9) has_mask_cs = true;
 
   if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
   {
@@ -787,23 +786,21 @@ char *status_get_guess_charset (const hashcat_ctx_t *hashcat_ctx)
   const char *custom_charset_6 = user_options->custom_charset_6;
   const char *custom_charset_7 = user_options->custom_charset_7;
   const char *custom_charset_8 = user_options->custom_charset_8;
-  const char *custom_charset_9 = user_options->custom_charset_9;
 
-  if ((custom_charset_1 != NULL) || (custom_charset_2 != NULL) || (custom_charset_3 != NULL) || (custom_charset_4 != NULL) || (custom_charset_5 != NULL) || (custom_charset_6 != NULL) || (custom_charset_7 != NULL) || (custom_charset_8 != NULL) || (custom_charset_9 != NULL))
+  if ((custom_charset_1 != NULL) || (custom_charset_2 != NULL) || (custom_charset_3 != NULL) || (custom_charset_4 != NULL) || (custom_charset_5 != NULL) || (custom_charset_6 != NULL) || (custom_charset_7 != NULL) || (custom_charset_8 != NULL));
   {
     char *tmp_buf;
 
-    if (custom_charset_1 == NULL) custom_charset_1 = "Undef";
-    if (custom_charset_2 == NULL) custom_charset_2 = "Undef";
-    if (custom_charset_3 == NULL) custom_charset_3 = "Undef";
-    if (custom_charset_4 == NULL) custom_charset_4 = "Undef";
-    if (custom_charset_5 == NULL) custom_charset_5 = "Undef";
-    if (custom_charset_6 == NULL) custom_charset_6 = "Undef";
-    if (custom_charset_7 == NULL) custom_charset_7 = "Undef";
-    if (custom_charset_8 == NULL) custom_charset_8 = "Undef";
-    if (custom_charset_9 == NULL) custom_charset_9 = "Undef";
+    if (custom_charset_1 == NULL) custom_charset_1 = "N/A";
+    if (custom_charset_2 == NULL) custom_charset_2 = "N/A";
+    if (custom_charset_3 == NULL) custom_charset_3 = "N/A";
+    if (custom_charset_4 == NULL) custom_charset_4 = "N/A";
+    if (custom_charset_5 == NULL) custom_charset_5 = "N/A";
+    if (custom_charset_6 == NULL) custom_charset_6 = "N/A";
+    if (custom_charset_7 == NULL) custom_charset_7 = "N/A";
+    if (custom_charset_8 == NULL) custom_charset_8 = "N/A";
 
-    hc_asprintf (&tmp_buf, "-1 %s, -2 %s, -3 %s, -4 %s, -5 %s, -6 %s, -7 %s, -8 %s, -9 %s", custom_charset_1, custom_charset_2, custom_charset_3, custom_charset_4, custom_charset_5, custom_charset_6, custom_charset_7, custom_charset_8, custom_charset_9);
+    hc_asprintf (&tmp_buf, "-1 %s, -2 %s, -3 %s, -4 %s, -5 %s, -6 %s, -7 %s, -8 %s", custom_charset_1, custom_charset_2, custom_charset_3, custom_charset_4, custom_charset_5, custom_charset_6, custom_charset_7, custom_charset_8);
 
     return tmp_buf;
   }
diff --git a/src/usage.c b/src/usage.c
index 0b735fe0d..51f658981 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -134,7 +134,6 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   " -6, --custom-charset6          | CS   | User-defined charset ?6                              |",
   " -7, --custom-charset7          | CS   | User-defined charset ?7                              |",
   " -8, --custom-charset8          | CS   | User-defined charset ?8                              |",
-  " -9, --custom-charset9          | CS   | User-defined charset ?9                              |",
   "     --identify                 |      | Shows all supported algorithms for input hashes      | --identify my.hash",
   " -i, --increment                |      | Enable mask increment mode                           |",
   "     --increment-min            | Num  | Start mask incrementing at X                         | --increment-min=4",
diff --git a/src/user_options.c b/src/user_options.c
index 5093301e9..427351939 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -21,9 +21,9 @@
 #endif
 
 #ifdef WITH_BRAIN
-static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:5:6:7:8:9:iIbw:OMSY:z";
+static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:5:6:7:8:iIbw:OMSY:z";
 #else
-static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:5:6:7:8:9:iIbw:OMSY:";
+static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:5:6:7:8:iIbw:OMSY:";
 #endif
 
 static char *const SEPARATOR = ":";
@@ -55,7 +55,6 @@ static const struct option long_options[] =
   {"custom-charset6",           required_argument, NULL, IDX_CUSTOM_CHARSET_6},
   {"custom-charset7",           required_argument, NULL, IDX_CUSTOM_CHARSET_7},
   {"custom-charset8",           required_argument, NULL, IDX_CUSTOM_CHARSET_8},
-  {"custom-charset9",           required_argument, NULL, IDX_CUSTOM_CHARSET_9},
   {"debug-file",                required_argument, NULL, IDX_DEBUG_FILE},
   {"debug-mode",                required_argument, NULL, IDX_DEBUG_MODE},
   {"deprecated-check-disable",  no_argument,       NULL, IDX_DEPRECATED_CHECK_DISABLE},
@@ -171,11 +170,6 @@ static const char *const RULE_BUF_L = ":";
 static const char *const DEF_MASK_CS_1 = "?l?d?u";
 static const char *const DEF_MASK_CS_2 = "?l?d";
 static const char *const DEF_MASK_CS_3 = "?l?d*!$@_";
-static const char *const DEF_MASK_CS_4 = "a";
-static const char *const DEF_MASK_CS_5 = "b";
-static const char *const DEF_MASK_CS_6 = "c";
-static const char *const DEF_MASK_CS_7 = "d";
-static const char *const DEF_MASK_CS_8 = "e";
 
 int user_options_init (hashcat_ctx_t *hashcat_ctx)
 {
@@ -217,7 +211,6 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->custom_charset_6          = NULL;
   user_options->custom_charset_7          = NULL;
   user_options->custom_charset_8          = NULL;
-  user_options->custom_charset_9          = NULL;
   user_options->debug_file                = NULL;
   user_options->debug_mode                = DEBUG_MODE;
   user_options->deprecated_check          = DEPRECATED_CHECK;
@@ -551,7 +544,6 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_CUSTOM_CHARSET_6:          user_options->custom_charset_6          = optarg;                          break;
       case IDX_CUSTOM_CHARSET_7:          user_options->custom_charset_7          = optarg;                          break;
       case IDX_CUSTOM_CHARSET_8:          user_options->custom_charset_8          = optarg;                          break;
-      case IDX_CUSTOM_CHARSET_9:          user_options->custom_charset_9          = optarg;                          break;
       case IDX_SLOW_CANDIDATES:           user_options->slow_candidates           = true;                            break;
       #ifdef WITH_BRAIN
       case IDX_BRAIN_CLIENT:              user_options->brain_client              = true;                            break;
@@ -1353,8 +1345,7 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
      || (user_options->custom_charset_5 != NULL)
      || (user_options->custom_charset_6 != NULL)
      || (user_options->custom_charset_7 != NULL)
-     || (user_options->custom_charset_8 != NULL)
-     || (user_options->custom_charset_9 != NULL))
+     || (user_options->custom_charset_8 != NULL))
     {
       if ((user_options->attack_mode == ATTACK_MODE_STRAIGHT) || (user_options->attack_mode == ATTACK_MODE_ASSOCIATION))
       {
@@ -1526,8 +1517,7 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
    || (user_options->custom_charset_5 != NULL)
    || (user_options->custom_charset_6 != NULL)
    || (user_options->custom_charset_7 != NULL)
-   || (user_options->custom_charset_8 != NULL)
-   || (user_options->custom_charset_9 != NULL))
+   || (user_options->custom_charset_8 != NULL))
   {
     if (user_options->attack_mode == ATTACK_MODE_STRAIGHT)
     {
@@ -2071,11 +2061,6 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_1 = DEF_MASK_CS_1;
         user_options->custom_charset_2 = DEF_MASK_CS_2;
         user_options->custom_charset_3 = DEF_MASK_CS_3;
-        user_options->custom_charset_4 = DEF_MASK_CS_4;
-        user_options->custom_charset_5 = DEF_MASK_CS_5;
-        user_options->custom_charset_6 = DEF_MASK_CS_6;
-        user_options->custom_charset_7 = DEF_MASK_CS_7;
-        user_options->custom_charset_8 = DEF_MASK_CS_8;
 
         user_options->increment = true;
       }
@@ -2087,11 +2072,6 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_1 = DEF_MASK_CS_1;
         user_options->custom_charset_2 = DEF_MASK_CS_2;
         user_options->custom_charset_3 = DEF_MASK_CS_3;
-        user_options->custom_charset_4 = DEF_MASK_CS_4;
-        user_options->custom_charset_5 = DEF_MASK_CS_5;
-        user_options->custom_charset_6 = DEF_MASK_CS_6;
-        user_options->custom_charset_7 = DEF_MASK_CS_7;
-        user_options->custom_charset_8 = DEF_MASK_CS_8;
 
         user_options->increment = true;
       }
@@ -2103,11 +2083,6 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
         user_options->custom_charset_1 = DEF_MASK_CS_1;
         user_options->custom_charset_2 = DEF_MASK_CS_2;
         user_options->custom_charset_3 = DEF_MASK_CS_3;
-        user_options->custom_charset_4 = DEF_MASK_CS_4;
-        user_options->custom_charset_5 = DEF_MASK_CS_5;
-        user_options->custom_charset_6 = DEF_MASK_CS_6;
-        user_options->custom_charset_7 = DEF_MASK_CS_7;
-        user_options->custom_charset_8 = DEF_MASK_CS_8;
 
         user_options->increment = true;
       }
@@ -3248,7 +3223,6 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx)
   logfile_top_string (user_options->custom_charset_6);
   logfile_top_string (user_options->custom_charset_7);
   logfile_top_string (user_options->custom_charset_8);
-  logfile_top_string (user_options->custom_charset_9);
   logfile_top_string (user_options->debug_file);
   logfile_top_string (user_options->encoding_from);
   logfile_top_string (user_options->encoding_to);

From 4e0fc2099eef1f2e367a4290fdc305f9edb710d1 Mon Sep 17 00:00:00 2001
From: Flaggx1 <bigbot@gmail.com>
Date: Thu, 25 Jul 2024 07:29:26 -0400
Subject: [PATCH 04/83] Removed incorrect semicolon and blank line

---
 src/brain.c  | 1 -
 src/status.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/brain.c b/src/brain.c
index f313b5536..2aec26300 100644
--- a/src/brain.c
+++ b/src/brain.c
@@ -374,7 +374,6 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
       XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
     }
   }
-
   else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
   {
     const u64 wordlist_hash = brain_compute_attack_wordlist (straight_ctx->dict);
diff --git a/src/status.c b/src/status.c
index ac661acd4..b33fba85e 100644
--- a/src/status.c
+++ b/src/status.c
@@ -787,7 +787,7 @@ char *status_get_guess_charset (const hashcat_ctx_t *hashcat_ctx)
   const char *custom_charset_7 = user_options->custom_charset_7;
   const char *custom_charset_8 = user_options->custom_charset_8;
 
-  if ((custom_charset_1 != NULL) || (custom_charset_2 != NULL) || (custom_charset_3 != NULL) || (custom_charset_4 != NULL) || (custom_charset_5 != NULL) || (custom_charset_6 != NULL) || (custom_charset_7 != NULL) || (custom_charset_8 != NULL));
+  if ((custom_charset_1 != NULL) || (custom_charset_2 != NULL) || (custom_charset_3 != NULL) || (custom_charset_4 != NULL) || (custom_charset_5 != NULL) || (custom_charset_6 != NULL) || (custom_charset_7 != NULL) || (custom_charset_8 != NULL))
   {
     char *tmp_buf;
 

From 7398b03be30b45d1e89dd7b7ec7799ee5d5b0f52 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 24 Oct 2024 20:15:08 +0200
Subject: [PATCH 05/83] User Options: limit --bitmap-max value to 31

---
 docs/changes.txt   | 1 +
 src/bitmap.c       | 2 ++
 src/usage.c        | 2 +-
 src/user_options.c | 7 +++++++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 283e3c0d4..ef9be50bb 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -130,6 +130,7 @@
 - Metal Backend: allow use of devices with Metal if runtime version is >= 200
 - Metal Backend: disable Metal devices only if at least one OpenCL device is active
 - User Options: added --metal-compiler-runtime option
+- User Options: limit --bitmap-max value to 31
 - Hardware Monitor: avoid sprintf in src/ext_iokit.c
 - Help: show supported hash-modes only with -hh
 - Makefile: prevent make failure with Apple Silicon in case of partial rebuild
diff --git a/src/bitmap.c b/src/bitmap.c
index 367b16a7c..12f701fe5 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -110,6 +110,8 @@ int bitmap_ctx_init (hashcat_ctx_t *hashcat_ctx)
   u32 *bitmap_s2_c = (u32 *) hcmalloc ((1U << bitmap_max) * sizeof (u32));
   u32 *bitmap_s2_d = (u32 *) hcmalloc ((1U << bitmap_max) * sizeof (u32));
 
+  if (!bitmap_s1_a || !bitmap_s1_b || !bitmap_s1_c || !bitmap_s1_d || !bitmap_s2_a || !bitmap_s2_b || !bitmap_s2_c || !bitmap_s2_d) return -1;
+
   u32 bitmap_bits;
   u32 bitmap_nums;
   u32 bitmap_mask;
diff --git a/src/usage.c b/src/usage.c
index 7cbf22348..1bee7c547 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -91,7 +91,7 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   "     --progress-only            |      | Return ideal progress step size and time to process  |",
   " -c, --segment-size             | Num  | Sets size in MB to cache from the wordfile to X      | -c 32",
   "     --bitmap-min               | Num  | Sets minimum bits allowed for bitmaps to X           | --bitmap-min=24",
-  "     --bitmap-max               | Num  | Sets maximum bits allowed for bitmaps to X           | --bitmap-max=24",
+  "     --bitmap-max               | Num  | Sets maximum bits allowed for bitmaps to X (max: 31) | --bitmap-max=24",
   "     --cpu-affinity             | Str  | Locks to CPU devices, separated with commas          | --cpu-affinity=1,2,3",
   "     --hook-threads             | Num  | Sets number of threads for a hook (per compute unit) | --hook-threads=8",
   "     --hash-info                |      | Show information for each hash-mode                  |",
diff --git a/src/user_options.c b/src/user_options.c
index b1ed588f4..1bf346d13 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -945,6 +945,13 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     return -1;
   }
 
+  if (user_options->bitmap_max > 31)
+  {
+    event_log_error (hashcat_ctx, "Invalid --bitmap-max value specified - must be lower than 32.");
+
+    return -1;
+  }
+
   if (user_options->rp_gen_func_min > user_options->rp_gen_func_max)
   {
     event_log_error (hashcat_ctx, "Invalid --rp-gen-func-min value specified.");

From a66c93ae1e3b3f4b299a220322821dfa5599e6ee Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 24 Oct 2024 21:13:11 +0200
Subject: [PATCH 06/83] Fixed bug in --stdout when multiple computing devices
 are active

---
 docs/changes.txt |  1 +
 include/types.h  |  2 ++
 src/outfile.c    |  5 +++++
 src/stdout.c     | 11 +++++++++++
 4 files changed, 19 insertions(+)

diff --git a/docs/changes.txt b/docs/changes.txt
index 283e3c0d4..a4a17bfd8 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -82,6 +82,7 @@
 - Fixed bug in grep out-of-memory workaround on Unit Test
 - Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules
 - Fixed bug in --stdout that caused certain rules to malfunction
+- Fixed bug in --stdout when multiple computing devices are active
 - Fixed build failed for 10700 optimized with Apple Metal
 - Fixed build failed for 13772 and 13773 with Apple Metal
 - Fixed build failed for 18400 with Apple Metal
diff --git a/include/types.h b/include/types.h
index e6ea946f9..6b508d7f0 100644
--- a/include/types.h
+++ b/include/types.h
@@ -2122,6 +2122,8 @@ typedef struct outfile_ctx
 
   char   *filename;
 
+  hc_thread_mutex_t mux_outfile;
+
 } outfile_ctx_t;
 
 typedef struct pot
diff --git a/src/outfile.c b/src/outfile.c
index b14b56a7a..43ddb8611 100644
--- a/src/outfile.c
+++ b/src/outfile.c
@@ -15,6 +15,7 @@
 #include "backend.h"
 #include "shared.h"
 #include "locking.h"
+#include "thread.h"
 #include "outfile.h"
 
 u32 outfile_format_parse (const char *format_string)
@@ -506,6 +507,8 @@ int outfile_init (hashcat_ctx_t *hashcat_ctx)
   outfile_ctx->outfile_json    = user_options->outfile_json;
   outfile_ctx->is_fifo         = hc_path_is_fifo (outfile_ctx->filename);
 
+  hc_thread_mutex_init (outfile_ctx->mux_outfile);
+
   return 0;
 }
 
@@ -513,6 +516,8 @@ void outfile_destroy (hashcat_ctx_t *hashcat_ctx)
 {
   outfile_ctx_t *outfile_ctx = hashcat_ctx->outfile_ctx;
 
+  hc_thread_mutex_delete (outfile_ctx->mux_outfile);
+
   if (outfile_ctx->is_fifo == true && outfile_ctx->fp.pfp != NULL)
   {
     hc_unlockfile (&outfile_ctx->fp);
diff --git a/src/stdout.c b/src/stdout.c
index 429836793..127232d0e 100644
--- a/src/stdout.c
+++ b/src/stdout.c
@@ -12,6 +12,7 @@
 #include "mpsp.h"
 #include "backend.h"
 #include "shared.h"
+#include "thread.h"
 #include "stdout.h"
 
 static void out_flush (out_t *out)
@@ -59,6 +60,10 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   straight_ctx_t   *straight_ctx   = hashcat_ctx->straight_ctx;
   user_options_t   *user_options   = hashcat_ctx->user_options;
 
+  // prevent wrong candidates in output when backend_ctx->backend_devices_active > 1
+
+  hc_thread_mutex_lock (outfile_ctx->mux_outfile);
+
   char *filename = outfile_ctx->filename;
 
   out_t out;
@@ -69,6 +74,8 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     {
       event_log_error (hashcat_ctx, "%s: %s", filename, strerror (errno));
 
+      hc_thread_mutex_unlock (outfile_ctx->mux_outfile);
+
       return -1;
     }
 
@@ -78,6 +85,8 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
       event_log_error (hashcat_ctx, "%s: %s", filename, strerror (errno));
 
+      hc_thread_mutex_unlock (outfile_ctx->mux_outfile);
+
       return -1;
     }
   }
@@ -341,5 +350,7 @@ int process_stdout (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     hc_fclose (&out.fp);
   }
 
+  hc_thread_mutex_unlock (outfile_ctx->mux_outfile);
+
   return rc;
 }

From e3c097c88bca1a9395fdf91738e3b2aab314c1bc Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 26 Oct 2024 14:51:11 +0200
Subject: [PATCH 07/83] do not allow --show and --restore

---
 src/hashcat.c      | 2 ++
 src/user_options.c | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/src/hashcat.c b/src/hashcat.c
index a6dd823d1..642a13952 100644
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -1430,6 +1430,8 @@ bool autodetect_hashmode_test (hashcat_ctx_t *hashcat_ctx)
   {
     char *input_buf = user_options_extra->hc_hash;
 
+    if (!input_buf) return false;
+
     size_t input_len = strlen (input_buf);
 
     char  *hash_buf = NULL;
diff --git a/src/user_options.c b/src/user_options.c
index b1ed588f4..3576cb3d2 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -1059,6 +1059,13 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     return 0;
   }
 
+  if (user_options->show == true && user_options->restore == true)
+  {
+    event_log_error (hashcat_ctx, "Mixing --show and --restore is not allowed.");
+
+    return -1;
+  }
+
   if (user_options->show == true || user_options->left == true)
   {
     if (user_options->remove == true)

From b67638ab36224a3201cea67696b25ebc7f962dc9 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 26 Oct 2024 15:11:45 +0200
Subject: [PATCH 08/83] do not allow --stdout and --slow-candidates

---
 src/user_options.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/user_options.c b/src/user_options.c
index b1ed588f4..f9bf491ea 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -1052,6 +1052,13 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  if (user_options->stdout_flag == true && user_options->slow_candidates == true)
+  {
+    event_log_error (hashcat_ctx, "Slow candidates (-S) is not allowed in stdout mode.");
+
+    return -1;
+  }
+
   if ((user_options->show == true) && ((user_options->username == true) || (user_options->dynamic_x == true)))
   {
     event_log_error (hashcat_ctx, "Mixing --show with --username or --dynamic-x can cause exponential delay in output.");

From d93d208ad8174cded81346a64c9a34340188c0f0 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 31 Oct 2024 18:43:35 +0100
Subject: [PATCH 09/83] Modules: Added module_unstable_warning for 22500,
 update module_unstable_warning for 10700

---
 docs/changes.txt           |  1 +
 src/modules/module_10700.c |  7 ++-----
 src/modules/module_22500.c | 13 ++++++++++++-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 283e3c0d4..37d53e24d 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -118,6 +118,7 @@
 - Building: Support building windows binaries on macOS using MinGW
 - Dependencies: Updated OpenCL-Headers to v2023.04.17
 - Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
+- Modules: Added module_unstable_warning for 22500, update module_unstable_warning for 10700
 - Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
 - Open Document Format: Added support for small documents with content length < 1024
 - Status Code: Add specific return code for self-test fail (-11)
diff --git a/src/modules/module_10700.c b/src/modules/module_10700.c
index 05be37b65..a28e79829 100644
--- a/src/modules/module_10700.c
+++ b/src/modules/module_10700.c
@@ -84,13 +84,10 @@ static const int   ROUNDS_PDF17L8 = 64;
 
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
-  // AppleM1, OpenCL, MTLCompilerService, createKernel never-end with pure kernel
+  // AppleM1, OpenCL, MTLCompilerService, createKernel never-end with pure kernel and newComputePipelineState failed with optimized kernel
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    if ((hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) == 0)
-    {
-      return true;
-    }
+    return true;
   }
 
   return false;
diff --git a/src/modules/module_22500.c b/src/modules/module_22500.c
index 534539b4e..9f47c6689 100644
--- a/src/modules/module_22500.c
+++ b/src/modules/module_22500.c
@@ -43,6 +43,17 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 
 static const char *SIGNATURE_MULTIBIT = "$multibit$";
 
+bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  // AppleM1, OpenCL, MTLCompilerService never-end
+  if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
+  {
+    return true;
+  }
+
+  return false;
+}
+
 u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool optimized_kernel = (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL);
@@ -228,6 +239,6 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_st_hash                  = module_st_hash;
   module_ctx->module_st_pass                  = module_st_pass;
   module_ctx->module_tmp_size                 = MODULE_DEFAULT;
-  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning         = module_unstable_warning;
   module_ctx->module_warmup_disable           = MODULE_DEFAULT;
 }

From 3e10c363dedbe3d1630c607e79287a2df919b12b Mon Sep 17 00:00:00 2001
From: PenguinKeeper7 <jones-adam@live.co.uk>
Date: Tue, 26 Nov 2024 00:08:26 +0000
Subject: [PATCH 10/83] Check additional blocks for safety

---
 OpenCL/m26610-pure.cl | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/OpenCL/m26610-pure.cl b/OpenCL/m26610-pure.cl
index abd476395..cb02840f6 100644
--- a/OpenCL/m26610-pure.cl
+++ b/OpenCL/m26610-pure.cl
@@ -368,7 +368,7 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
 
   AES_GCM_decrypt (key, J0, ct, 32, pt, s_te0, s_te1, s_te2, s_te3, s_te4);
 
-  const int correct = is_valid_printable_32 (pt[0])
+  int correct = is_valid_printable_32 (pt[0])
                     + is_valid_printable_32 (pt[1])
                     + is_valid_printable_32 (pt[2])
                     + is_valid_printable_32 (pt[3])
@@ -379,6 +379,37 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
 
   if (correct != 8) return;
 
+  u32 ct2[8];
+
+  ct2[0] = pbkdf2_sha256_aes_gcm->ct_buf[8]; // third block of ciphertext
+  ct2[1] = pbkdf2_sha256_aes_gcm->ct_buf[9];
+  ct2[2] = pbkdf2_sha256_aes_gcm->ct_buf[10];
+  ct2[3] = pbkdf2_sha256_aes_gcm->ct_buf[11];
+  ct2[4] = pbkdf2_sha256_aes_gcm->ct_buf[12]; // fourth block of ciphertext
+  ct2[5] = pbkdf2_sha256_aes_gcm->ct_buf[13];
+  ct2[6] = pbkdf2_sha256_aes_gcm->ct_buf[14];
+  ct2[7] = pbkdf2_sha256_aes_gcm->ct_buf[15];
+
+  // Only a single increment as the previous AES_GCM_DECRYPT already does one for us
+  J0[3]++;
+
+  u32 pt2[8] = { 0 };
+
+  AES_GCM_decrypt (key, J0, ct2, 32, pt2, s_te0, s_te1, s_te2, s_te3, s_te4);
+
+  correct = is_valid_printable_32 (pt2[0])
+                    + is_valid_printable_32 (pt2[1])
+                    + is_valid_printable_32 (pt2[2])
+                    + is_valid_printable_32 (pt2[3])
+                    + is_valid_printable_32 (pt2[4])
+                    + is_valid_printable_32 (pt2[5])
+                    + is_valid_printable_32 (pt2[6])
+                    + is_valid_printable_32 (pt2[7]);
+
+  // We need to check a second and third block to avoid extremely rare false-positives. See:
+  // https://github.com/hashcat/hashcat/issues/4121
+  if (correct != 8) return;
+
   /*
   const int pt_len = 28; // not using 32 byte but 28 because our UTF8 allows up to 4 byte per character and since we decrypt 32 byte
                          // only we can't guarantee it is not in the middle of a UTF8 byte stream at that point

From dcfa17100ea13b27f5bd18c8ed2bf1b99ce276e4 Mon Sep 17 00:00:00 2001
From: PenguinKeeper7 <jones-adam@live.co.uk>
Date: Tue, 26 Nov 2024 00:09:53 +0000
Subject: [PATCH 11/83] Minor typo fix

---
 OpenCL/m26610-pure.cl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/OpenCL/m26610-pure.cl b/OpenCL/m26610-pure.cl
index cb02840f6..9815eb8dd 100644
--- a/OpenCL/m26610-pure.cl
+++ b/OpenCL/m26610-pure.cl
@@ -406,7 +406,7 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh
                     + is_valid_printable_32 (pt2[6])
                     + is_valid_printable_32 (pt2[7]);
 
-  // We need to check a second and third block to avoid extremely rare false-positives. See:
+  // We need to check a third and fourth block to avoid extremely rare false-positives. See:
   // https://github.com/hashcat/hashcat/issues/4121
   if (correct != 8) return;
 

From 29259ff97315ef0899cdf52bc8ec3c16b124f67b Mon Sep 17 00:00:00 2001
From: PenguinKeeper7 <jones-adam@live.co.uk>
Date: Tue, 7 Jan 2025 02:43:31 +0000
Subject: [PATCH 12/83] Improve salt length reporting in hashconfig

Increase the amount of hash modes that will have their salt min/max length showed by including SALT_TYPE_GENERIC
---
 src/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main.c b/src/main.c
index 085ce38b4..9158698a9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -996,7 +996,7 @@ static void main_hashconfig_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE
 
   if (hashconfig->is_salted == true)
   {
-    if (hashconfig->opti_type & OPTI_TYPE_RAW_HASH)
+    if (hashconfig->opti_type & OPTI_TYPE_RAW_HASH || hashconfig->salt_type & SALT_TYPE_GENERIC)
     {
       event_log_info (hashcat_ctx, "Minimum salt length supported by kernel: %u", hashconfig->salt_min);
       event_log_info (hashcat_ctx, "Maximum salt length supported by kernel: %u", hashconfig->salt_max);

From ab77b8f5bafa86460d1a0eba2f28831ac19cd833 Mon Sep 17 00:00:00 2001
From: sc00bz <Sc00bz@users.noreply.github.com>
Date: Mon, 3 Feb 2025 10:35:05 -0600
Subject: [PATCH 13/83] Added support for multiple TOTP codes

---
 OpenCL/m18100_a0-pure.cl   | 277 +++++++++++++------
 OpenCL/m18100_a1-pure.cl   | 441 +++++++++++++++++++++++--------
 OpenCL/m18100_a3-pure.cl   | 529 +++++++++++++++++++++++--------------
 src/modules/module_18100.c | 152 +++++++----
 4 files changed, 953 insertions(+), 446 deletions(-)

diff --git a/OpenCL/m18100_a0-pure.cl b/OpenCL/m18100_a0-pure.cl
index 9c8f047a9..396d896bc 100644
--- a/OpenCL/m18100_a0-pure.cl
+++ b/OpenCL/m18100_a0-pure.cl
@@ -16,6 +16,47 @@
 #include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
 #endif
 
+DECLSPEC void _totp_calculate (PRIVATE_AS u32 *code, PRIVATE_AS const u32 *w, const u32 pw_len, PRIVATE_AS const u32 *s, const u32 salt_len)
+{
+  sha1_hmac_ctx_t ctx;
+
+  sha1_hmac_init_swap (&ctx, w, pw_len);
+
+  sha1_hmac_update (&ctx, s, salt_len);
+
+  sha1_hmac_final (&ctx);
+
+  // initialize a buffer for the otp code
+  u32 otp_code = 0;
+
+  // grab 4 consecutive bytes of the hash, starting at offset
+  switch (ctx.opad.h[4] & 15)
+  {
+    case  0: otp_code = ctx.opad.h[0];                              break;
+    case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
+    case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
+    case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
+    case  4: otp_code = ctx.opad.h[1];                              break;
+    case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
+    case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
+    case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
+    case  8: otp_code = ctx.opad.h[2];                              break;
+    case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
+    case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
+    case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
+    case 12: otp_code = ctx.opad.h[3];                              break;
+    case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
+    case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
+    case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+  }
+
+  // take only the lower 31 bits
+  otp_code &= 0x7fffffff;
+
+  // we want to generate only 6 digits of code
+  *code = otp_code % 1000000;
+}
+
 KERNEL_FQ void m18100_mxx (KERN_ATTR_RULES ())
 {
   /**
@@ -33,63 +74,85 @@ KERNEL_FQ void m18100_mxx (KERN_ATTR_RULES ())
 
   COPY_PW (pws[gid]);
 
-  const u32 salt_len = 8;
+  const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
 
   u32 s[64] = { 0 };
 
-  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  for (u32 i = 0; i < count; i += 1)
   {
-    s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
+    s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
+    s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
   }
 
   /**
    * loop
    */
 
-  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  if (count == 1)
   {
-    pw_t tmp = PASTE_PW;
-
-    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
-
-    sha1_hmac_ctx_t ctx;
-
-    sha1_hmac_init_swap (&ctx, tmp.i, tmp.pw_len);
-
-    sha1_hmac_update (&ctx, s, salt_len);
-
-    sha1_hmac_final (&ctx);
-
-    // initialize a buffer for the otp code
-    u32 otp_code = 0;
-
-    // grab 4 consecutive bytes of the hash, starting at offset
-    switch (ctx.opad.h[4] & 15)
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      case  0: otp_code = ctx.opad.h[0];                              break;
-      case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
-      case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
-      case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
-      case  4: otp_code = ctx.opad.h[1];                              break;
-      case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
-      case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
-      case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
-      case  8: otp_code = ctx.opad.h[2];                              break;
-      case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
-      case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
-      case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
-      case 12: otp_code = ctx.opad.h[3];                              break;
-      case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
-      case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
-      case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+      pw_t tmp = PASTE_PW;
+
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+      u32 otp_code0;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
+
+      COMPARE_M_SCALAR (otp_code0, 0, 0, 0);
     }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      pw_t tmp = PASTE_PW;
 
-    // take only the lower 31 bits
-    otp_code &= 0x7fffffff;
-    // we want to generate only 6 digits of code
-    otp_code %= 1000000;
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
 
-    COMPARE_M_SCALAR (otp_code, 0, 0, 0);
+      u32 otp_code0, otp_code1;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s +  0, 8);
+      _totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
+
+      COMPARE_M_SCALAR (otp_code0, otp_code1, 0, 0);
+    }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      pw_t tmp = PASTE_PW;
+
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+      u32 otp_code0, otp_code1, otp_code2;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s +  0, 8);
+      _totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
+      _totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
+
+      COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, 0);
+    }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      pw_t tmp = PASTE_PW;
+
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+      u32 otp_code0, otp_code1, otp_code2, otp_code3;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s +  0, 8);
+      _totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
+      _totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
+      _totp_calculate (&otp_code3, tmp.i, tmp.pw_len, s + 48, 8);
+
+      COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
+    }
   }
 }
 
@@ -122,62 +185,108 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_RULES ())
 
   COPY_PW (pws[gid]);
 
-  const u32 salt_len = 8;
+  const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
 
   u32 s[64] = { 0 };
 
-  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  for (u32 i = 0; i < count; i += 1)
   {
-    s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
+    s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
+    s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
   }
 
   /**
    * loop
    */
 
-  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  if (count == 1)
   {
-    pw_t tmp = PASTE_PW;
-
-    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
-
-    sha1_hmac_ctx_t ctx;
-
-    sha1_hmac_init_swap (&ctx, tmp.i, tmp.pw_len);
-
-    sha1_hmac_update (&ctx, s, salt_len);
-
-    sha1_hmac_final (&ctx);
-
-    // initialize a buffer for the otp code
-    u32 otp_code = 0;
-
-    // grab 4 consecutive bytes of the hash, starting at offset
-    switch (ctx.opad.h[4] & 15)
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      case  0: otp_code = ctx.opad.h[0];                              break;
-      case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
-      case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
-      case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
-      case  4: otp_code = ctx.opad.h[1];                              break;
-      case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
-      case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
-      case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
-      case  8: otp_code = ctx.opad.h[2];                              break;
-      case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
-      case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
-      case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
-      case 12: otp_code = ctx.opad.h[3];                              break;
-      case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
-      case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
-      case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+      pw_t tmp = PASTE_PW;
+
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+      u32 otp_code0;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
+
+      COMPARE_S_SCALAR (otp_code0, 0, 0, 0);
     }
+  }
+  else if (count == 2)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      pw_t tmp = PASTE_PW;
 
-    // take only the lower 31 bits
-    otp_code &= 0x7fffffff;
-    // we want to generate only 6 digits of code
-    otp_code %= 1000000;
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
 
-    COMPARE_S_SCALAR (otp_code, 0, 0, 0);
+      u32 otp_code0, otp_code1;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
+
+      if (otp_code0 == search[0])
+      {
+        _totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
+
+        COMPARE_S_SCALAR (otp_code0, otp_code1, 0, 0);
+      }
+    }
+  }
+  else if (count == 3)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      pw_t tmp = PASTE_PW;
+
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+      u32 otp_code0, otp_code1, otp_code2;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
+
+      if (otp_code0 == search[0])
+      {
+        _totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
+
+        if (otp_code1 == search[1])
+        {
+          _totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
+
+          COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, 0);
+        }
+      }
+    }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      pw_t tmp = PASTE_PW;
+
+      tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+      u32 otp_code0, otp_code1, otp_code2, otp_code3;
+
+      _totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
+
+      if (otp_code0 == search[0])
+      {
+        _totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
+
+        if (otp_code1 == search[1])
+        {
+          _totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
+
+          if (otp_code2 == search[2])
+          {
+            _totp_calculate (&otp_code3, tmp.i, tmp.pw_len, s + 48, 8);
+
+            COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
+          }
+        }
+      }
+    }
   }
 }
diff --git a/OpenCL/m18100_a1-pure.cl b/OpenCL/m18100_a1-pure.cl
index a596894ca..4eefd1fdc 100644
--- a/OpenCL/m18100_a1-pure.cl
+++ b/OpenCL/m18100_a1-pure.cl
@@ -14,6 +14,47 @@
 #include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
 #endif
 
+DECLSPEC void _totp_calculate (PRIVATE_AS u32 *code, PRIVATE_AS const u32 *w, const u32 pw_len, PRIVATE_AS const u32 *s, const u32 salt_len)
+{
+  sha1_hmac_ctx_t ctx;
+
+  sha1_hmac_init (&ctx, w, pw_len);
+
+  sha1_hmac_update (&ctx, s, salt_len);
+
+  sha1_hmac_final (&ctx);
+
+  // initialize a buffer for the otp code
+  u32 otp_code = 0;
+
+  // grab 4 consecutive bytes of the hash, starting at offset
+  switch (ctx.opad.h[4] & 15)
+  {
+    case  0: otp_code = ctx.opad.h[0];                              break;
+    case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
+    case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
+    case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
+    case  4: otp_code = ctx.opad.h[1];                              break;
+    case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
+    case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
+    case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
+    case  8: otp_code = ctx.opad.h[2];                              break;
+    case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
+    case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
+    case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
+    case 12: otp_code = ctx.opad.h[3];                              break;
+    case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
+    case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
+    case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+  }
+
+  // take only the lower 31 bits
+  otp_code &= 0x7fffffff;
+
+  // we want to generate only 6 digits of code
+  *code = otp_code % 1000000;
+}
+
 KERNEL_FQ void m18100_mxx (KERN_ATTR_BASIC ())
 {
   /**
@@ -38,81 +79,157 @@ KERNEL_FQ void m18100_mxx (KERN_ATTR_BASIC ())
     w[idx] = hc_swap32_S (pws[gid].i[idx]);
   }
 
-  const u32 salt_len = 8;
+  const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
 
   u32 s[64] = { 0 };
 
-  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  for (u32 i = 0; i < count; i += 1)
   {
-    s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
+    s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
+    s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
   }
 
   /**
    * loop
    */
 
-  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  if (count == 1)
   {
-    const u32 comb_len = combs_buf[il_pos].pw_len;
-
-    u32 c[64];
-
-    #ifdef _unroll
-    #pragma unroll
-    #endif
-    for (int idx = 0; idx < 64; idx++)
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      const u32 comb_len = combs_buf[il_pos].pw_len;
+
+      u32 c[64];
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
+
+      COMPARE_M_SCALAR (otp_code0, 0, 0, 0);
     }
-
-    switch_buffer_by_offset_1x64_be_S (c, pw_len);
-
-    #ifdef _unroll
-    #pragma unroll
-    #endif
-    for (int i = 0; i < 64; i++)
+  }
+  else if (count == 2)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      c[i] |= w[i];
+      const u32 comb_len = combs_buf[il_pos].pw_len;
+
+      u32 c[64];
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0, otp_code1;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s +  0, 8);
+      _totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
+
+      COMPARE_M_SCALAR (otp_code0, otp_code1, 0, 0);
     }
-
-    sha1_hmac_ctx_t ctx;
-
-    sha1_hmac_init (&ctx, c, pw_len + comb_len);
-
-    sha1_hmac_update (&ctx, s, salt_len);
-
-    sha1_hmac_final (&ctx);
-
-    // initialize a buffer for the otp code
-    u32 otp_code = 0;
-
-    // grab 4 consecutive bytes of the hash, starting at offset
-    switch (ctx.opad.h[4] & 15)
+  }
+  else if (count == 3)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      case  0: otp_code = ctx.opad.h[0];                              break;
-      case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
-      case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
-      case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
-      case  4: otp_code = ctx.opad.h[1];                              break;
-      case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
-      case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
-      case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
-      case  8: otp_code = ctx.opad.h[2];                              break;
-      case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
-      case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
-      case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
-      case 12: otp_code = ctx.opad.h[3];                              break;
-      case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
-      case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
-      case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+      const u32 comb_len = combs_buf[il_pos].pw_len;
+
+      u32 c[64];
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0, otp_code1, otp_code2;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s +  0, 8);
+      _totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
+      _totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
+
+      COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, 0);
     }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      const u32 comb_len = combs_buf[il_pos].pw_len;
 
-    // take only the lower 31 bits
-    otp_code &= 0x7fffffff;
-    // we want to generate only 6 digits of code
-    otp_code %= 1000000;
+      u32 c[64];
 
-    COMPARE_M_SCALAR (otp_code, 0, 0, 0);
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0, otp_code1, otp_code2, otp_code3;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s +  0, 8);
+      _totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
+      _totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
+      _totp_calculate (&otp_code3, c, pw_len + comb_len, s + 48, 8);
+
+      COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
+    }
   }
 }
 
@@ -152,80 +269,180 @@ KERNEL_FQ void m18100_sxx (KERN_ATTR_BASIC ())
     w[idx] = hc_swap32_S (pws[gid].i[idx]);
   }
 
-  const u32 salt_len = 8;
+  const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
 
   u32 s[64] = { 0 };
 
-  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  for (u32 i = 0; i < count; i += 1)
   {
-    s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
+    s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
+    s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
   }
 
   /**
    * loop
    */
 
-  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  if (count == 1)
   {
-    const u32 comb_len = combs_buf[il_pos].pw_len;
-
-    u32 c[64];
-
-    #ifdef _unroll
-    #pragma unroll
-    #endif
-    for (int idx = 0; idx < 64; idx++)
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      const u32 comb_len = combs_buf[il_pos].pw_len;
+
+      u32 c[64];
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
+
+      COMPARE_S_SCALAR (otp_code0, 0, 0, 0);
     }
-
-    switch_buffer_by_offset_1x64_be_S (c, pw_len);
-
-    #ifdef _unroll
-    #pragma unroll
-    #endif
-    for (int i = 0; i < 64; i++)
+  }
+  else if (count == 2)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      c[i] |= w[i];
+      const u32 comb_len = combs_buf[il_pos].pw_len;
+
+      u32 c[64];
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0, otp_code1;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
+
+      if (otp_code0 == search[0])
+      {
+        _totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
+
+        COMPARE_S_SCALAR (otp_code0, otp_code1, 0, 0);
+      }
     }
-
-    sha1_hmac_ctx_t ctx;
-
-    sha1_hmac_init (&ctx, c, pw_len + comb_len);
-
-    sha1_hmac_update (&ctx, s, salt_len);
-
-    sha1_hmac_final (&ctx);
-
-    // initialize a buffer for the otp code
-    u32 otp_code = 0;
-
-    // grab 4 consecutive bytes of the hash, starting at offset
-    switch (ctx.opad.h[4] & 15)
+  }
+  else if (count == 3)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
     {
-      case  0: otp_code = ctx.opad.h[0];                              break;
-      case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
-      case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
-      case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
-      case  4: otp_code = ctx.opad.h[1];                              break;
-      case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
-      case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
-      case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
-      case  8: otp_code = ctx.opad.h[2];                              break;
-      case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
-      case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
-      case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
-      case 12: otp_code = ctx.opad.h[3];                              break;
-      case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
-      case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
-      case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+      const u32 comb_len = combs_buf[il_pos].pw_len;
+
+      u32 c[64];
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0, otp_code1, otp_code2;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
+
+      if (otp_code0 == search[0])
+      {
+        _totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
+
+        if (otp_code1 == search[1])
+        {
+          _totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
+
+          COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, 0);
+        }
+      }
     }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+    {
+      const u32 comb_len = combs_buf[il_pos].pw_len;
 
-    // take only the lower 31 bits
-    otp_code &= 0x7fffffff;
-    // we want to generate only 6 digits of code
-    otp_code %= 1000000;
+      u32 c[64];
 
-    COMPARE_S_SCALAR (otp_code, 0, 0, 0);
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int idx = 0; idx < 64; idx++)
+      {
+        c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
+      }
+
+      switch_buffer_by_offset_1x64_be_S (c, pw_len);
+
+      #ifdef _unroll
+      #pragma unroll
+      #endif
+      for (int i = 0; i < 64; i++)
+      {
+        c[i] |= w[i];
+      }
+
+      u32 otp_code0, otp_code1, otp_code2, otp_code3;
+
+      _totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
+
+      if (otp_code0 == search[0])
+      {
+        _totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
+
+        if (otp_code1 == search[1])
+        {
+          _totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
+
+          if (otp_code2 == search[2])
+          {
+            _totp_calculate (&otp_code3, c, pw_len + comb_len, s + 48, 8);
+
+            COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
+          }
+        }
+      }
+    }
   }
 }
diff --git a/OpenCL/m18100_a3-pure.cl b/OpenCL/m18100_a3-pure.cl
index 70235ce95..2dd68ffdf 100644
--- a/OpenCL/m18100_a3-pure.cl
+++ b/OpenCL/m18100_a3-pure.cl
@@ -1,205 +1,324 @@
-/**
- * Author......: See docs/credits.txt
- * License.....: MIT
- */
-
-//#define NEW_SIMD_CODE
-
-#ifdef KERNEL_STATIC
-#include M2S(INCLUDE_PATH/inc_vendor.h)
-#include M2S(INCLUDE_PATH/inc_types.h)
-#include M2S(INCLUDE_PATH/inc_platform.cl)
-#include M2S(INCLUDE_PATH/inc_common.cl)
-#include M2S(INCLUDE_PATH/inc_simd.cl)
-#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
-#endif
-
-KERNEL_FQ void m18100_mxx (KERN_ATTR_VECTOR ())
-{
-  /**
-   * modifier
-   */
-
-  const u64 lid = get_local_id (0);
-  const u64 gid = get_global_id (0);
-
-  if (gid >= GID_CNT) return;
-
-  /**
-   * base
-   */
-
-  const u32 pw_len = pws[gid].pw_len;
-
-  u32x w[64] = { 0 };
-
-  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
-  {
-    w[idx] = pws[gid].i[idx];
-  }
-
-  const u32 salt_len = 8;
-
-  u32x s[64] = { 0 };
-
-  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
-  {
-    s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
-  }
-
-  /**
-   * loop
-   */
-
-  u32x w0l = w[0];
-
-  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
-  {
-    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
-
-    const u32x w0 = w0l | w0r;
-
-    w[0] = w0;
-
-    sha1_hmac_ctx_vector_t ctx;
-
-    sha1_hmac_init_vector (&ctx, w, pw_len);
-
-    sha1_hmac_update_vector (&ctx, s, salt_len);
-
-    sha1_hmac_final_vector (&ctx);
-
-    // initialize a buffer for the otp code
-    u32 otp_code = 0;
-
-    // grab 4 consecutive bytes of the hash, starting at offset
-    switch (ctx.opad.h[4] & 15)
-    {
-      case  0: otp_code = ctx.opad.h[0];                              break;
-      case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
-      case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
-      case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
-      case  4: otp_code = ctx.opad.h[1];                              break;
-      case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
-      case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
-      case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
-      case  8: otp_code = ctx.opad.h[2];                              break;
-      case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
-      case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
-      case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
-      case 12: otp_code = ctx.opad.h[3];                              break;
-      case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
-      case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
-      case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
-    }
-
-    // take only the lower 31 bits
-    otp_code &= 0x7fffffff;
-
-    // we want to generate only 6 digits of code
-    otp_code %= 1000000;
-
-    COMPARE_M_SIMD (otp_code, 0, 0, 0);
-  }
-}
-
-KERNEL_FQ void m18100_sxx (KERN_ATTR_VECTOR ())
-{
-  /**
-   * modifier
-   */
-
-  const u64 lid = get_local_id (0);
-  const u64 gid = get_global_id (0);
-
-  if (gid >= GID_CNT) return;
-
-  /**
-   * digest
-   */
-
-  const u32 search[4] =
-  {
-    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
-    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
-    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
-    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
-  };
-
-  /**
-   * base
-   */
-
-  const u32 pw_len = pws[gid].pw_len;
-
-  u32x w[64] = { 0 };
-
-  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
-  {
-    w[idx] = pws[gid].i[idx];
-  }
-
-  const u32 salt_len = 8;
-
-  u32x s[64] = { 0 };
-
-  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
-  {
-    s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
-  }
-
-  /**
-   * loop
-   */
-
-  u32x w0l = w[0];
-
-  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
-  {
-    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
-
-    const u32x w0 = w0l | w0r;
-
-    w[0] = w0;
-
-    sha1_hmac_ctx_vector_t ctx;
-
-    sha1_hmac_init_vector (&ctx, w, pw_len);
-
-    sha1_hmac_update_vector (&ctx, s, salt_len);
-
-    sha1_hmac_final_vector (&ctx);
-
-    // initialize a buffer for the otp code
-    u32 otp_code = 0;
-
-    // grab 4 consecutive bytes of the hash, starting at offset
-    switch (ctx.opad.h[4] & 15)
-    {
-      case  0: otp_code = ctx.opad.h[0];                              break;
-      case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
-      case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
-      case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
-      case  4: otp_code = ctx.opad.h[1];                              break;
-      case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
-      case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
-      case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
-      case  8: otp_code = ctx.opad.h[2];                              break;
-      case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
-      case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
-      case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
-      case 12: otp_code = ctx.opad.h[3];                              break;
-      case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
-      case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
-      case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
-    }
-
-    // take only the lower 31 bits
-    otp_code &= 0x7fffffff;
-
-    // we want to generate only 6 digits of code
-    otp_code %= 1000000;
-
-    COMPARE_S_SIMD (otp_code, 0, 0, 0);
-  }
-}
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
+#endif
+
+DECLSPEC void _totp_calculate (PRIVATE_AS u32x *code, PRIVATE_AS const u32x *w, const u32 pw_len, PRIVATE_AS const u32x *s, const u32 salt_len)
+{
+  sha1_hmac_ctx_vector_t ctx;
+
+  sha1_hmac_init_vector (&ctx, w, pw_len);
+
+  sha1_hmac_update_vector (&ctx, s, salt_len);
+
+  sha1_hmac_final_vector (&ctx);
+
+  // initialize a buffer for the otp code
+  u32x otp_code = 0;
+
+  // grab 4 consecutive bytes of the hash, starting at offset
+  switch (ctx.opad.h[4] & 15)
+  {
+    case  0: otp_code = ctx.opad.h[0];                              break;
+    case  1: otp_code = ctx.opad.h[0] <<  8 | ctx.opad.h[1] >> 24;  break;
+    case  2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16;  break;
+    case  3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >>  8;  break;
+    case  4: otp_code = ctx.opad.h[1];                              break;
+    case  5: otp_code = ctx.opad.h[1] <<  8 | ctx.opad.h[2] >> 24;  break;
+    case  6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16;  break;
+    case  7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >>  8;  break;
+    case  8: otp_code = ctx.opad.h[2];                              break;
+    case  9: otp_code = ctx.opad.h[2] <<  8 | ctx.opad.h[3] >> 24;  break;
+    case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16;  break;
+    case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >>  8;  break;
+    case 12: otp_code = ctx.opad.h[3];                              break;
+    case 13: otp_code = ctx.opad.h[3] <<  8 | ctx.opad.h[4] >> 24;  break;
+    case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16;  break;
+    case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >>  8;  break;
+  }
+
+  // take only the lower 31 bits
+  otp_code &= 0x7fffffff;
+
+  // we want to generate only 6 digits of code
+  *code = otp_code % 1000000;
+}
+
+KERNEL_FQ void m18100_mxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0; i < count; i += 1)
+  {
+    s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
+    s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  if (count == 1)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0;
+
+      _totp_calculate (&otp_code0, w, pw_len, s, 8);
+
+      COMPARE_M_SIMD (otp_code0, 0, 0, 0);
+    }
+  }
+  else if (count == 2)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0, otp_code1;
+
+      _totp_calculate (&otp_code0, w, pw_len, s +  0, 8);
+      _totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
+
+      COMPARE_M_SIMD (otp_code0, otp_code1, 0, 0);
+    }
+  }
+  else if (count == 3)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0, otp_code1, otp_code2;
+
+      _totp_calculate (&otp_code0, w, pw_len, s +  0, 8);
+      _totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
+      _totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
+
+      COMPARE_M_SIMD (otp_code0, otp_code1, otp_code2, 0);
+    }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0, otp_code1, otp_code2, otp_code3;
+
+      _totp_calculate (&otp_code0, w, pw_len, s +  0, 8);
+      _totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
+      _totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
+      _totp_calculate (&otp_code3, w, pw_len, s + 48, 8);
+
+      COMPARE_M_SIMD (otp_code0, otp_code1, otp_code2, otp_code3);
+    }
+  }
+}
+
+KERNEL_FQ void m18100_sxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0; i < count; i += 1)
+  {
+    s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
+    s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  if (count == 1)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0;
+
+      _totp_calculate (&otp_code0, w, pw_len, s, 8);
+
+      COMPARE_S_SIMD (otp_code0, 0, 0, 0);
+    }
+  }
+  else if (count == 2)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0, otp_code1;
+
+      _totp_calculate (&otp_code0, w, pw_len, s, 8);
+
+      if (MATCHES_ONE_VS(otp_code0, search[0]))
+      {
+        _totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
+
+        COMPARE_S_SIMD (otp_code0, otp_code1, 0, 0);
+      }
+    }
+  }
+  else if (count == 3)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0, otp_code1, otp_code2;
+
+      _totp_calculate (&otp_code0, w, pw_len, s, 8);
+
+      if (MATCHES_ONE_VS(otp_code0, search[0]))
+      {
+        _totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
+
+        if (MATCHES_ONE_VS(otp_code1, search[1]))
+        {
+          _totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
+
+          COMPARE_S_SIMD (otp_code0, otp_code1, otp_code2, 0);
+        }
+      }
+    }
+  }
+  else if (count == 4)
+  {
+    for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+    {
+      const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+      const u32x w0 = w0l | w0r;
+
+      w[0] = w0;
+
+      u32x otp_code0, otp_code1, otp_code2, otp_code3;
+
+      _totp_calculate (&otp_code0, w, pw_len, s, 8);
+
+      if (MATCHES_ONE_VS(otp_code0, search[0]))
+      {
+        _totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
+
+        if (MATCHES_ONE_VS(otp_code1, search[1]))
+        {
+          _totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
+
+          if (MATCHES_ONE_VS(otp_code2, search[2]))
+          {
+            _totp_calculate (&otp_code3, w, pw_len, s + 48, 8);
+
+            COMPARE_S_SIMD (otp_code0, otp_code1, otp_code2, otp_code3);
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/modules/module_18100.c b/src/modules/module_18100.c
index 37a8ef2e7..54b12a68e 100644
--- a/src/modules/module_18100.c
+++ b/src/modules/module_18100.c
@@ -29,7 +29,7 @@ static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_SUGGEST_KG;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
-static const char *ST_HASH        = "597056:3600";
+static const char *ST_HASH        = "597056:3600:613004:1234567890:322664:9876543210";
 
 u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
 u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
@@ -57,54 +57,92 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   hc_token_t token;
 
-  memset (&token, 0, sizeof (hc_token_t));
-
-  token.token_cnt  = 2;
-
-  token.sep[0]     = hashconfig->separator;
-  token.len[0]     = 6;
-  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
-                   | TOKEN_ATTR_VERIFY_HEX;
-
-  token.len_min[1] = SALT_MIN;
-  token.len_max[1] = SALT_MAX;
-  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH;
-
-  if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+  // 1 to 4 TOTP codes
+  // 597056:3600
+  // 597056:3600:613004:1234567890
+  // 597056:3600:613004:1234567890:322664:9876543210
+  // 597056:3600:613004:1234567890:322664:9876543210:068798:111222333
+  int count;
+  for (count = 8; count > 0; count -= 2)
   {
-    token.len_min[1] *= 2;
-    token.len_max[1] *= 2;
+    memset (&token, 0, sizeof (hc_token_t));
 
-    token.attr[1] |= TOKEN_ATTR_VERIFY_DIGIT;
+    token.token_cnt = count;
+
+    for (int i = 0; i < count; i += 2)
+    {
+      token.sep[i + 0]     = hashconfig->separator;
+      token.len[i + 0]     = 6;
+      token.attr[i + 0]    = TOKEN_ATTR_FIXED_LENGTH
+                           | TOKEN_ATTR_VERIFY_DIGIT;
+
+      // 0 to 18446744073709551616
+      token.sep[i + 1]     = hashconfig->separator;
+      token.len_min[i + 1] = 1;
+      token.len_max[i + 1] = 20;
+      token.attr[i + 1]    = TOKEN_ATTR_VERIFY_LENGTH
+                           | TOKEN_ATTR_VERIFY_DIGIT;
+
+      if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+      {
+        token.len_min[i + 1] *= 2;
+        token.len_max[i + 1] *= 2;
+        token.attr[i + 1]     = TOKEN_ATTR_VERIFY_LENGTH
+                              | TOKEN_ATTR_VERIFY_HEX;
+      }
+    }
+
+    const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+    if (rc_tokenizer == PARSER_OK) break;
+
+    // failed all tokenizers
+    if (count == 2) return (rc_tokenizer);
+  }
+  count /= 2;
+
+  for (int i = 0; i < count; i += 1)
+  {
+    // now we need to reduce our hash into a token
+    int otp_code = hc_strtoul ((const char *) token.buf[2 * i + 0], NULL, 10);
+
+    digest[i] = otp_code;
+
+    const u8 *salt_pos = token.buf[2 * i + 1];
+
+    // convert ascii timestamp to ulong timestamp
+    u64 timestamp = hc_strtoull ((const char *) salt_pos, NULL, 10);
+
+    // store the original salt value. Step division will destroy granularity for output
+    salt->salt_buf[4 * i + 3] = ((u32) (timestamp >>  0));
+    salt->salt_buf[4 * i + 2] = ((u32) (timestamp >> 32));
+
+    // divide our timestamp by our step. We will use the RFC 6238 default of 30 for now
+    timestamp /= 30;
+
+    // convert counter to 8-byte salt
+    salt->salt_buf[4 * i + 1] = byte_swap_32 ((u32) (timestamp >>  0));
+    salt->salt_buf[4 * i + 0] = byte_swap_32 ((u32) (timestamp >> 32));
   }
 
-  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+  // verify unique salts
+  for (int i = 0; i < count; i += 1)
+  {
+    u32 s0 = salt->salt_buf[4 * i + 0];
+    u32 s1 = salt->salt_buf[4 * i + 1];
 
-  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
-
-  // now we need to reduce our hash into a token
-  int otp_code = hc_strtoul (line_buf, NULL, 10);
-
-  digest[0] = otp_code;
-
-  const u8 *salt_pos = token.buf[1];
-
-  // convert ascii timestamp to ulong timestamp
-  u64 timestamp = hc_strtoull ((const char *) salt_pos, NULL, 10);
-
-  // store the original salt value. Step division will destroy granularity for output
-  salt->salt_buf[3] = ((u32) (timestamp >>  0));
-  salt->salt_buf[2] = ((u32) (timestamp >> 32));
-
-  // divide our timestamp by our step. We will use the RFC 6238 default of 30 for now
-  timestamp /= 30;
-
-  // convert counter to 8-byte salt
-  salt->salt_buf[1] = byte_swap_32 ((u32) (timestamp >>  0));
-  salt->salt_buf[0] = byte_swap_32 ((u32) (timestamp >> 32));
+    for (int j = i + 1; j < count; j += 1)
+    {
+      if (salt->salt_buf[4 * j + 0] == s0 &&
+          salt->salt_buf[4 * j + 1] == s1)
+      {
+        return (PARSER_SALT_VALUE);
+      }
+    }
+  }
 
   // our salt will always be 8 bytes, but we are going to cheat and store it twice, so...
-  salt->salt_len = 16;
+  salt->salt_len = 16 * count;
 
   return (PARSER_OK);
 }
@@ -113,13 +151,37 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 {
   const u32 *digest = (const u32 *) digest_buf;
 
-  // salt_buf[1] holds our 32 bit value. salt_buf[0] and salt_buf[1] would be 64 bits.
+  // salt_buf[4 * i + 1] holds our 32 bit value. salt_buf[4 * i + 0] and salt_buf[4 * i + 1] would be 64 bits.
   // we also need to multiply salt by our step to see the floor of our original timestamp range.
   // again, we will use the default RFC 6238 step of 30.
 
-  const u64 tmp_salt_buf = (((u64) (salt->salt_buf[2])) << 32) | ((u64) (salt->salt_buf[3]));
+  int count = salt->salt_len / 16;
 
-  const int line_len = snprintf (line_buf, line_size, "%06d%c%" PRIu64, digest[0], hashconfig->separator, tmp_salt_buf);
+  // all but the last TOTP code
+  int i = 0, line_len = 0;
+  for (; i < count - 1; i += 1)
+  {
+    const u64 tmp_salt_buf = (((u64) (salt->salt_buf[4 * i + 2])) << 32) | ((u64) (salt->salt_buf[4 * i + 3]));
+    const int ret = snprintf (line_buf + line_len, line_size - line_len, "%06d%c%" PRIu64 "%c", digest[i], hashconfig->separator, tmp_salt_buf, hashconfig->separator);
+    line_len += ret;
+
+    // error
+    if (ret < 0)
+    {
+      return ret;
+    }
+  }
+
+  // the last TOTP code
+  const u64 tmp_salt_buf = (((u64) (salt->salt_buf[4 * i + 2])) << 32) | ((u64) (salt->salt_buf[4 * i + 3]));
+  const int ret = snprintf (line_buf + line_len, line_size - line_len, "%06d%c%" PRIu64, digest[i], hashconfig->separator, tmp_salt_buf);
+  line_len += ret;
+
+  // error
+  if (ret < 0)
+  {
+    return ret;
+  }
 
   return line_len;
 }

From 08514edd22c10d21ba329317b0985753a695396f Mon Sep 17 00:00:00 2001
From: unix-ninja <chris@unix-ninja.com>
Date: Sat, 15 Feb 2025 22:29:38 -0500
Subject: [PATCH 14/83] Ignore .DS_Store files.

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index d8ff600bb..f7d1967e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+.DS_Store
+*/.DS_Store
 *.exe
 *.bin
 *.app

From faa680fbab803723d77449b7107c1c985a6b7981 Mon Sep 17 00:00:00 2001
From: unix-ninja <chris@unix-ninja.com>
Date: Sat, 15 Feb 2025 22:30:05 -0500
Subject: [PATCH 15/83] Add gitea2hashcat.py

---
 tools/gitea2hashcat.py | 75 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100755 tools/gitea2hashcat.py

diff --git a/tools/gitea2hashcat.py b/tools/gitea2hashcat.py
new file mode 100755
index 000000000..ac1c539d2
--- /dev/null
+++ b/tools/gitea2hashcat.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python3
+# Converts gitea PBKDF2-HMAC-SHA256 hashes into a format hashcat can use
+# written by unix-ninja
+
+import argparse
+import base64
+import sys
+
+def convert_hash(hash_string):
+    """Converts a SALT+HASH string to a hashcat compatible format,
+       ensuring the smaller input is treated as the salt.
+       Use : or | as delimeters.
+    """
+    hash_string = hash_string.replace('|', ':')
+    try:
+        part1, part2 = hash_string.split(":")
+    except ValueError:
+        print(f"[-] Invalid input format: {hash_string}")
+        return None
+
+    try:
+        bytes1 = bytes.fromhex(part1)
+        bytes2 = bytes.fromhex(part2)
+    except ValueError:
+      print(f"[-] Invalid hex input: {hash_string}")
+      return None
+
+    # If lengths are equal, we will maintain the original order
+    if len(bytes1) > len(bytes2):
+        salt_bytes = bytes2
+        hash_bytes = bytes1
+    else:  
+        salt_bytes = bytes1
+        hash_bytes = bytes2
+
+
+    salt_b64 = base64.b64encode(salt_bytes).decode('utf-8')
+    hash_b64 = base64.b64encode(hash_bytes).decode('utf-8')
+
+    return f"sha256:50000:{salt_b64}:{hash_b64}"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert Gitea SALT+HASH strings to a hashcat-compatible format.",
+        formatter_class=argparse.RawTextHelpFormatter,
+        epilog="""Example:
+    gitea2hashcat.py <salt1>:<hash1> <hash2>|<salt2> ... or pipe input from stdin.
+        
+    You can also dump output straight from sqlite into this script:
+        sqlite3 gitea.db 'select salt,passwd from user;' | gitea2hashcat.py""")
+    parser.add_argument('hashes', nargs='*', help='SALT+HASH strings to convert')
+    args = parser.parse_args()
+
+    # ... (rest of the main function remains the same)
+    print("[+] Run the output hashes through hashcat mode 10900 (PBKDF2-HMAC-SHA256)")
+    print()
+
+    if args.hashes:
+        # Process command-line arguments
+        for hash_string in args.hashes:
+            converted_hash = convert_hash(hash_string)
+            if converted_hash:
+                print(converted_hash)
+
+    else:
+        # Process input from stdin
+        for line in sys.stdin:
+            hash_string = line.strip()  # Remove leading/trailing whitespace
+            converted_hash = convert_hash(hash_string)
+            if converted_hash:
+                print(converted_hash)
+
+
+if __name__ == "__main__":
+    main()

From faecf1e034e9ebdaa27b131a05f8155c613c60e1 Mon Sep 17 00:00:00 2001
From: luke <92046606+dunghm19@users.noreply.github.com>
Date: Wed, 23 Apr 2025 08:48:01 +0700
Subject: [PATCH 16/83] Added hash-mode: md5($salt.md5($pass).$salt)

---
 OpenCL/m32710_a0-pure.cl     | 277 ++++++++++++++++++++++++++++++++
 OpenCL/m32710_a1-pure.cl     | 271 ++++++++++++++++++++++++++++++++
 OpenCL/m32710_a3-pure.cl     | 297 +++++++++++++++++++++++++++++++++++
 docs/changes.txt             |   3 +-
 docs/readme.txt              |   1 +
 src/modules/module_32710.c   | 222 ++++++++++++++++++++++++++
 tools/test_modules/m32710.pm |  44 ++++++
 7 files changed, 1114 insertions(+), 1 deletion(-)
 create mode 100644 OpenCL/m32710_a0-pure.cl
 create mode 100644 OpenCL/m32710_a1-pure.cl
 create mode 100644 OpenCL/m32710_a3-pure.cl
 create mode 100644 src/modules/module_32710.c
 create mode 100644 tools/test_modules/m32710.pm

diff --git a/OpenCL/m32710_a0-pure.cl b/OpenCL/m32710_a0-pure.cl
new file mode 100644
index 000000000..0a9680da4
--- /dev/null
+++ b/OpenCL/m32710_a0-pure.cl
@@ -0,0 +1,277 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp.h)
+#include M2S(INCLUDE_PATH/inc_rp.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
+#endif
+
+#if   VECT_SIZE == 1
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
+#elif VECT_SIZE == 2
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#elif VECT_SIZE == 4
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#elif VECT_SIZE == 8
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#elif VECT_SIZE == 16
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#endif
+
+KERNEL_FQ void m32710_mxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * bin2asc table
+   */
+
+  LOCAL_VK u32 l_bin2asc[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    const u32 i0 = (i >> 0) & 15;
+    const u32 i1 = (i >> 4) & 15;
+
+    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
+                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
+  }
+
+  SYNC_THREADS ();
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    md5_ctx_t ctx0;
+
+    md5_init (&ctx0);
+
+    md5_update (&ctx0, tmp.i, tmp.pw_len);
+
+    md5_final (&ctx0);
+
+    const u32 a = ctx0.h[0];
+    const u32 b = ctx0.h[1];
+    const u32 c = ctx0.h[2];
+    const u32 d = ctx0.h[3];
+
+    md5_ctx_t ctx;
+
+    md5_init (&ctx);
+
+    md5_update (&ctx, s, salt_len);
+
+    u32 w0[4];
+    u32 w1[4];
+    u32 w2[4];
+    u32 w3[4];
+
+    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
+    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
+    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
+    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
+    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
+    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
+    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
+    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  
+    md5_update_64 (&ctx, w0, w1, w2, w3, 32);
+
+    md5_update (&ctx, s, salt_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m32710_sxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * bin2asc table
+   */
+
+  LOCAL_VK u32 l_bin2asc[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    const u32 i0 = (i >> 0) & 15;
+    const u32 i1 = (i >> 4) & 15;
+
+    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
+                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
+  }
+
+  SYNC_THREADS ();
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    md5_ctx_t ctx0;
+
+    md5_init (&ctx0);
+
+    md5_update (&ctx0, tmp.i, tmp.pw_len);
+
+    md5_final (&ctx0);
+
+    const u32 a = ctx0.h[0];
+    const u32 b = ctx0.h[1];
+    const u32 c = ctx0.h[2];
+    const u32 d = ctx0.h[3];
+
+    md5_ctx_t ctx;
+
+    md5_init (&ctx);
+
+    md5_update (&ctx, s, salt_len);
+
+    u32 w0[4];
+    u32 w1[4];
+    u32 w2[4];
+    u32 w3[4];
+
+    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
+    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
+    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
+    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
+    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
+    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
+    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
+    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+
+    md5_update_64 (&ctx, w0, w1, w2, w3, 32);
+
+    md5_update (&ctx, s, salt_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m32710_a1-pure.cl b/OpenCL/m32710_a1-pure.cl
new file mode 100644
index 000000000..cb21ffd00
--- /dev/null
+++ b/OpenCL/m32710_a1-pure.cl
@@ -0,0 +1,271 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
+#endif
+
+#if   VECT_SIZE == 1
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
+#elif VECT_SIZE == 2
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#elif VECT_SIZE == 4
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#elif VECT_SIZE == 8
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#elif VECT_SIZE == 16
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#endif
+
+KERNEL_FQ void m32710_mxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * bin2asc table
+   */
+
+  LOCAL_VK u32 l_bin2asc[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    const u32 i0 = (i >> 0) & 15;
+    const u32 i1 = (i >> 4) & 15;
+
+    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
+                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
+  }
+
+  SYNC_THREADS ();
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    md5_ctx_t ctx1 = ctx0;
+
+    md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    md5_final (&ctx1);
+
+    const u32 a = ctx1.h[0];
+    const u32 b = ctx1.h[1];
+    const u32 c = ctx1.h[2];
+    const u32 d = ctx1.h[3];
+
+    md5_ctx_t ctx;
+
+    md5_init (&ctx);
+
+    md5_update (&ctx, s, salt_len);
+
+    u32 w0[4];
+    u32 w1[4];
+    u32 w2[4];
+    u32 w3[4];
+
+    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
+    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
+    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
+    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
+    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
+    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
+    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
+    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+
+    md5_update_64 (&ctx, w0, w1, w2, w3, 32);
+
+    md5_update (&ctx, s, salt_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m32710_sxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * bin2asc table
+   */
+
+  LOCAL_VK u32 l_bin2asc[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    const u32 i0 = (i >> 0) & 15;
+    const u32 i1 = (i >> 4) & 15;
+
+    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
+                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
+  }
+
+  SYNC_THREADS ();
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    md5_ctx_t ctx1 = ctx0;
+
+    md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    md5_final (&ctx1);
+
+    const u32 a = ctx1.h[0];
+    const u32 b = ctx1.h[1];
+    const u32 c = ctx1.h[2];
+    const u32 d = ctx1.h[3];
+
+    md5_ctx_t ctx;
+
+    md5_init (&ctx);
+
+    md5_update (&ctx, s, salt_len);
+
+    u32 w0[4];
+    u32 w1[4];
+    u32 w2[4];
+    u32 w3[4];
+
+    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
+    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
+    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
+    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
+    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
+    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
+    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
+    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+
+    md5_update_64 (&ctx, w0, w1, w2, w3, 32);
+
+    md5_update (&ctx, s, salt_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m32710_a3-pure.cl b/OpenCL/m32710_a3-pure.cl
new file mode 100644
index 000000000..069ae70f5
--- /dev/null
+++ b/OpenCL/m32710_a3-pure.cl
@@ -0,0 +1,297 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
+#endif
+
+#if   VECT_SIZE == 1
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
+#elif VECT_SIZE == 2
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
+#elif VECT_SIZE == 4
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
+#elif VECT_SIZE == 8
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
+#elif VECT_SIZE == 16
+#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
+#endif
+
+KERNEL_FQ void m32710_mxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * bin2asc table
+   */
+
+  LOCAL_VK u32 l_bin2asc[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    const u32 i0 = (i >> 0) & 15;
+    const u32 i1 = (i >> 4) & 15;
+
+    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
+                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
+  }
+
+  SYNC_THREADS ();
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0lr = w0l | w0r;
+
+    w[0] = w0lr;
+
+    md5_ctx_vector_t ctx0;
+
+    md5_init_vector (&ctx0);
+
+    md5_update_vector (&ctx0, w, pw_len);
+
+    md5_final_vector (&ctx0);
+
+    const u32x a = ctx0.h[0];
+    const u32x b = ctx0.h[1];
+    const u32x c = ctx0.h[2];
+    const u32x d = ctx0.h[3];
+
+    md5_ctx_vector_t ctx;
+
+    md5_init_vector (&ctx);
+
+    md5_update_vector (&ctx, s, salt_len);
+
+    u32x w0[4];
+    u32x w1[4];
+    u32x w2[4];
+    u32x w3[4];
+
+    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
+    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
+    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
+    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
+    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
+    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
+    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
+    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+
+    md5_update_vector_64 (&ctx, w0, w1, w2, w3, 32);
+
+    md5_update_vector (&ctx, s, salt_len);
+
+    md5_final_vector (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m32710_sxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * bin2asc table
+   */
+
+  LOCAL_VK u32 l_bin2asc[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    const u32 i0 = (i >> 0) & 15;
+    const u32 i1 = (i >> 4) & 15;
+
+    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
+                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
+  }
+
+  SYNC_THREADS ();
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0lr = w0l | w0r;
+
+    w[0] = w0lr;
+
+    md5_ctx_vector_t ctx0;
+
+    md5_init_vector (&ctx0);
+
+    md5_update_vector (&ctx0, w, pw_len);
+
+    md5_final_vector (&ctx0);
+
+    const u32x a = ctx0.h[0];
+    const u32x b = ctx0.h[1];
+    const u32x c = ctx0.h[2];
+    const u32x d = ctx0.h[3];
+
+    md5_ctx_vector_t ctx;
+
+    md5_init_vector (&ctx);
+
+    md5_update_vector (&ctx, s, salt_len);
+
+    u32x w0[4];
+    u32x w1[4];
+    u32x w2[4];
+    u32x w3[4];
+
+    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
+    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
+    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
+    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
+    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
+    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
+    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
+          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
+    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
+          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+
+    md5_update_vector_64 (&ctx, w0, w1, w2, w3, 32);
+
+    md5_update_vector (&ctx, s, salt_len);
+
+    md5_final_vector (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index 283e3c0d4..eb51b703b 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -49,12 +49,13 @@
 - Added hash-mode: md5(md5($salt).md5(md5($pass)))
 - Added hash-mode: Domain Cached Credentials 2 (DCC2), MS Cache 2, (NT)
 - Added hash-mode: Domain Cached Credentials (DCC), MS Cache (NT)
-- Added hash-mode: md5(md5(md5($pass).$salt1).$salt2)
+- Added hash-mode: md5(md5(md5($pass).$salt1).$salt2)	
 - Added hash-mode: md5(md5(md5($pass)).$salt)
 - Added hash-mode: md5(sha1($pass.$salt))
 - Added hash-mode: md5(sha1($salt.$pass))
 - Added hash-mode: sha512(sha512($pass).$salt)
 - Added hash-mode: sha512(sha512_bin($pass).$salt)
+- Added hash-mode: md5($salt.md5($pass).$salt)
 
 ##
 ## Features
diff --git a/docs/readme.txt b/docs/readme.txt
index e5fd69f25..55ab4b308 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -91,6 +91,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - md5(md5($pass))
 - md5(md5($pass).md5($salt))
 - md5(md5($pass.$salt))
+- md5($salt.md5($pass).$salt)
 - md5(md5(md5($pass)))
 - md5(md5(md5($pass)).$salt)
 - md5(md5(md5($pass).$salt1).$salt2)
diff --git a/src/modules/module_32710.c b/src/modules/module_32710.c
new file mode 100644
index 000000000..27abdf155
--- /dev/null
+++ b/src/modules/module_32710.c
@@ -0,0 +1,222 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+static const u32   ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32   DGST_POS0 = 0;
+static const u32   DGST_POS1 = 3;
+static const u32   DGST_POS2 = 2;
+static const u32   DGST_POS3 = 1;
+static const u32   DGST_SIZE = DGST_SIZE_4_4;
+static const u32   HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED;
+static const char* HASH_NAME = "md5($salt.md5($pass).$salt)";
+static const u64   KERN_TYPE = 32710;
+static const u32   OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
+                              | OPTI_TYPE_PRECOMPUTE_INIT
+                              | OPTI_TYPE_EARLY_SKIP;
+static const u64   OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
+                              | OPTS_TYPE_PT_GENERATE_LE
+                              | OPTS_TYPE_PT_ADD80
+                              | OPTS_TYPE_PT_ADDBITS14;
+static const u32   SALT_TYPE = SALT_TYPE_GENERIC;
+static const char* ST_PASS = "hashcat";
+static const char* ST_HASH = "866244ca1d318292a6f40b60e03fd29c:72219426709";
+
+u32         module_attack_exec(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return ATTACK_EXEC; }
+u32         module_dgst_pos0(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS0; }
+u32         module_dgst_pos1(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS1; }
+u32         module_dgst_pos2(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS2; }
+u32         module_dgst_pos3(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS3; }
+u32         module_dgst_size(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_SIZE; }
+u32         module_hash_category(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return HASH_CATEGORY; }
+const char* module_hash_name(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return HASH_NAME; }
+u64         module_kern_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return KERN_TYPE; }
+u32         module_opti_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return OPTI_TYPE; }
+u64         module_opts_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return OPTS_TYPE; }
+u32         module_salt_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return SALT_TYPE; }
+const char* module_st_hash(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return ST_HASH; }
+const char* module_st_pass(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return ST_PASS; }
+
+int module_hash_decode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED void* digest_buf, MAYBE_UNUSED salt_t* salt, MAYBE_UNUSED void* esalt_buf, MAYBE_UNUSED void* hook_salt_buf, MAYBE_UNUSED hashinfo_t* hash_info, const char* line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32* digest = (u32*)digest_buf;
+
+  hc_token_t token;
+
+  memset(&token, 0, sizeof(hc_token_t));
+
+  token.token_cnt = 2;
+
+  token.sep[0] = hashconfig->separator;
+  token.len[0] = 32;
+  token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
+    | TOKEN_ATTR_VERIFY_HEX;
+
+  token.len_min[1] = SALT_MIN;
+  token.len_max[1] = SALT_MAX;
+  token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH;
+
+  if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+  {
+    token.len_min[1] *= 2;
+    token.len_max[1] *= 2;
+
+    token.attr[1] |= TOKEN_ATTR_VERIFY_HEX;
+  }
+
+  const int rc_tokenizer = input_tokenizer((const u8*)line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  const u8* hash_pos = token.buf[0];
+
+  digest[0] = hex_to_u32(hash_pos + 0);
+  digest[1] = hex_to_u32(hash_pos + 8);
+  digest[2] = hex_to_u32(hash_pos + 16);
+  digest[3] = hex_to_u32(hash_pos + 24);
+
+  if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+  {
+    digest[0] -= MD5M_A;
+    digest[1] -= MD5M_B;
+    digest[2] -= MD5M_C;
+    digest[3] -= MD5M_D;
+  }
+
+  const u8* salt_pos = token.buf[1];
+  const int salt_len = token.len[1];
+
+  const bool parse_rc = generic_salt_decode(hashconfig, salt_pos, salt_len, (u8*)salt->salt_buf, (int*)&salt->salt_len);
+
+  if (parse_rc == false) return (PARSER_SALT_LENGTH);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const void* digest_buf, MAYBE_UNUSED const salt_t* salt, MAYBE_UNUSED const void* esalt_buf, MAYBE_UNUSED const void* hook_salt_buf, MAYBE_UNUSED const hashinfo_t* hash_info, char* line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32* digest = (const u32*)digest_buf;
+
+  // we can not change anything in the original buffer, otherwise destroying sorting
+  // therefore create some local buffer
+
+  u32 tmp[4];
+
+  tmp[0] = digest[0];
+  tmp[1] = digest[1];
+  tmp[2] = digest[2];
+  tmp[3] = digest[3];
+
+  if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+  {
+    tmp[0] += MD5M_A;
+    tmp[1] += MD5M_B;
+    tmp[2] += MD5M_C;
+    tmp[3] += MD5M_D;
+  }
+
+  u8* out_buf = (u8*)line_buf;
+
+  int out_len = 0;
+
+  u32_to_hex(tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex(tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex(tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex(tmp[3], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = hashconfig->separator;
+
+  out_len += 1;
+
+  out_len += generic_salt_encode(hashconfig, (const u8*)salt->salt_buf, (const int)salt->salt_len, out_buf + out_len);
+
+  return out_len;
+}
+
+void module_init(module_ctx_t* module_ctx)
+{
+  module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec = module_attack_exec;
+  module_ctx->module_benchmark_esalt = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0 = module_dgst_pos0;
+  module_ctx->module_dgst_pos1 = module_dgst_pos1;
+  module_ctx->module_dgst_pos2 = module_dgst_pos2;
+  module_ctx->module_dgst_pos3 = module_dgst_pos3;
+  module_ctx->module_dgst_size = module_dgst_size;
+  module_ctx->module_dictstat_disable = MODULE_DEFAULT;
+  module_ctx->module_esalt_size = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT;
+  module_ctx->module_hash_decode = module_hash_decode;
+  module_ctx->module_hash_encode_status = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile = MODULE_DEFAULT;
+  module_ctx->module_hash_encode = module_hash_encode;
+  module_ctx->module_hash_init_selftest = MODULE_DEFAULT;
+  module_ctx->module_hash_mode = MODULE_DEFAULT;
+  module_ctx->module_hash_category = module_hash_category;
+  module_ctx->module_hash_name = module_hash_name;
+  module_ctx->module_hashes_count_min = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term = MODULE_DEFAULT;
+  module_ctx->module_hook12 = MODULE_DEFAULT;
+  module_ctx->module_hook23 = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size = MODULE_DEFAULT;
+  module_ctx->module_hook_size = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
+  module_ctx->module_kern_type = module_kern_type;
+  module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
+  module_ctx->module_opti_type = module_opti_type;
+  module_ctx->module_opts_type = module_opts_type;
+  module_ctx->module_outfile_check_disable = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column = MODULE_DEFAULT;
+  module_ctx->module_pw_max = MODULE_DEFAULT;
+  module_ctx->module_pw_min = MODULE_DEFAULT;
+  module_ctx->module_salt_max = MODULE_DEFAULT;
+  module_ctx->module_salt_min = MODULE_DEFAULT;
+  module_ctx->module_salt_type = module_salt_type;
+  module_ctx->module_separator = MODULE_DEFAULT;
+  module_ctx->module_st_hash = module_st_hash;
+  module_ctx->module_st_pass = module_st_pass;
+  module_ctx->module_tmp_size = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m32710.pm b/tools/test_modules/m32710.pm
new file mode 100644
index 000000000..03d37d07f
--- /dev/null
+++ b/tools/test_modules/m32710.pm
@@ -0,0 +1,44 @@
+#!/usr/bin/env perl
+
+##
+## Author......: Custom Hashcat Test
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::MD5 qw (md5_hex);
+
+sub module_constraints { [[0, 256], [0, 223], [0, 55], [0, 23], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+
+  my $digest   = md5_hex($salt . md5_hex($word) . $salt);
+
+  my $hash = sprintf ("%s:%s", $digest, $salt);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my ($hash, $salt, $word) = split (':', $line);
+
+  return unless defined $hash;
+  return unless defined $salt;
+  return unless defined $word;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt);
+
+  return ($new_hash, $word);
+}
+
+1;

From fcfd7b00bae817cc53b9cb9cb889a5d78c86a431 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 24 Apr 2025 21:26:05 +0200
Subject: [PATCH 17/83] Added hash-mode: md5($salt1.$pass.$salt2)

---
 OpenCL/m33000_a0-pure.cl     | 157 ++++++++++++++++++
 OpenCL/m33000_a1-pure.cl     | 147 +++++++++++++++++
 OpenCL/m33000_a3-pure.cl     | 181 +++++++++++++++++++++
 docs/changes.txt             |   1 +
 docs/readme.txt              |   1 +
 src/modules/module_33000.c   | 301 +++++++++++++++++++++++++++++++++++
 tools/test_modules/m33000.pm |  46 ++++++
 7 files changed, 834 insertions(+)
 create mode 100644 OpenCL/m33000_a0-pure.cl
 create mode 100644 OpenCL/m33000_a1-pure.cl
 create mode 100644 OpenCL/m33000_a3-pure.cl
 create mode 100644 src/modules/module_33000.c
 create mode 100644 tools/test_modules/m33000.pm

diff --git a/OpenCL/m33000_a0-pure.cl b/OpenCL/m33000_a0-pure.cl
new file mode 100644
index 000000000..1c4cf0ebf
--- /dev/null
+++ b/OpenCL/m33000_a0-pure.cl
@@ -0,0 +1,157 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp.h)
+#include M2S(INCLUDE_PATH/inc_rp.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
+#endif
+
+typedef struct md5_double_salt
+{
+  u32 salt1_buf[64];
+  int salt1_len;
+
+  u32 salt2_buf[64];
+  int salt2_len;
+
+} md5_double_salt_t;
+
+KERNEL_FQ void m33000_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
+
+  u32 s2[64] = { 0 };
+
+  for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
+  {
+    s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    md5_ctx_t ctx = ctx0;
+
+    md5_update (&ctx, tmp.i, tmp.pw_len);
+
+    md5_update (&ctx, s2, salt2_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33000_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
+
+  u32 s2[64] = { 0 };
+
+  for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
+  {
+    s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    md5_ctx_t ctx = ctx0;
+
+    md5_update (&ctx, tmp.i, tmp.pw_len);
+
+    md5_update (&ctx, s2, salt2_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33000_a1-pure.cl b/OpenCL/m33000_a1-pure.cl
new file mode 100644
index 000000000..59a07b57c
--- /dev/null
+++ b/OpenCL/m33000_a1-pure.cl
@@ -0,0 +1,147 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
+#endif
+
+typedef struct md5_double_salt
+{
+  u32 salt1_buf[64];
+  int salt1_len;
+
+  u32 salt2_buf[64];
+  int salt2_len;
+
+} md5_double_salt_t;
+
+KERNEL_FQ void m33000_mxx (KERN_ATTR_ESALT (md5_double_salt_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
+
+  u32 s2[64] = { 0 };
+
+  for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
+  {
+    s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
+
+  md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    md5_ctx_t ctx = ctx0;
+
+    md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    md5_update (&ctx, s2, salt2_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33000_sxx (KERN_ATTR_ESALT (md5_double_salt_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
+
+  u32 s2[64] = { 0 };
+
+  for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
+  {
+    s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
+
+  md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    md5_ctx_t ctx = ctx0;
+
+    md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    md5_update (&ctx, s2, salt2_len);
+
+    md5_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33000_a3-pure.cl b/OpenCL/m33000_a3-pure.cl
new file mode 100644
index 000000000..eee052eff
--- /dev/null
+++ b/OpenCL/m33000_a3-pure.cl
@@ -0,0 +1,181 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
+#endif
+
+typedef struct md5_double_salt
+{
+  u32 salt1_buf[64];
+  int salt1_len;
+
+  u32 salt2_buf[64];
+  int salt2_len;
+
+} md5_double_salt_t;
+
+KERNEL_FQ void m33000_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
+
+  u32x s2[64] = { 0 };
+
+  for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
+  {
+    s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    md5_ctx_vector_t ctx;
+
+    md5_init_vector_from_scalar (&ctx, &ctx0);
+
+    md5_update_vector (&ctx, w, pw_len);
+
+    md5_update_vector (&ctx, s2, salt2_len);
+
+    md5_final_vector (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33000_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
+
+  u32x s2[64] = { 0 };
+
+  for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
+  {
+    s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
+  }
+
+  md5_ctx_t ctx0;
+
+  md5_init (&ctx0);
+
+  md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    md5_ctx_vector_t ctx;
+
+    md5_init_vector_from_scalar (&ctx, &ctx0);
+
+    md5_update_vector (&ctx, w, pw_len);
+
+    md5_update_vector (&ctx, s2, salt2_len);
+
+    md5_final_vector (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index 283e3c0d4..4dae255b3 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -44,6 +44,7 @@
 - Added hash-mode: bcrypt(sha256($pass))
 - Added hash-mode: HMAC-RIPEMD160 (key = $pass)
 - Added hash-mode: HMAC-RIPEMD160 (key = $salt)
+- Added hash-mode: md5($salt1.$pass.$salt2)
 - Added hash-mode: md5($salt1.sha1($salt2.$pass))
 - Added hash-mode: md5(md5($pass.$salt))
 - Added hash-mode: md5(md5($salt).md5(md5($pass)))
diff --git a/docs/readme.txt b/docs/readme.txt
index e5fd69f25..b00f21628 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -86,6 +86,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - md5($salt.md5($salt.$pass))
 - md5($salt.sha1($salt.$pass))
 - md5($salt.utf16le($pass))
+- md5($salt1.$pass.$salt2)
 - md5($salt1.sha1($salt2.$pass))
 - md5($salt1.strtoupper(md5($salt2.$pass)))
 - md5(md5($pass))
diff --git a/src/modules/module_33000.c b/src/modules/module_33000.c
new file mode 100644
index 000000000..67a040e17
--- /dev/null
+++ b/src/modules/module_33000.c
@@ -0,0 +1,301 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+#include "emu_inc_hash_md5.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 3;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 1;
+static const u32   DGST_SIZE      = DGST_SIZE_4_4;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_SALTED;
+static const char *HASH_NAME      = "md5($salt1.$pass.$salt2)";
+static const u64   KERN_TYPE      = 33000;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_PRECOMPUTE_INIT
+                                  | OPTI_TYPE_RAW_HASH
+                                  | OPTI_TYPE_EARLY_SKIP;
+static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
+                                  | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_PT_ADD80
+                                  | OPTS_TYPE_PT_ADDBITS14
+                                  | OPTS_TYPE_MAXIMUM_THREADS;
+static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "036a81bc84e01700faf965c3caaa3954:0243402616975530019305541949338903179746132451440267505028190519468680111713847350899833009965414425621884797638402856957040435715380438220464016:0757380776148401126145133134435506200715895167468508855794708942913462135276430452032928239699197100625556660484150983610760766285767453357925167463064045123083116191440783332986105343359475417787249790516137833723344398087127577224833364437305770807742238";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+typedef struct md5_double_salt
+{
+  u32 salt1_buf[64];
+  int salt1_len;
+
+  u32 salt2_buf[64];
+  int salt2_len;
+
+} md5_double_salt_t;
+
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 256;
+
+  return kernel_threads_max;
+}
+
+char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  char *jit_build_options = NULL;
+
+  if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
+  {
+    // this is a workaround to avoid a compile time of over an hour (and then to not work) on ROCM in pure kernel mode
+
+    hc_asprintf (&jit_build_options, "-D NO_INLINE");
+  }
+
+  return jit_build_options;
+}
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (md5_double_salt_t);
+
+  return esalt_size;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  md5_double_salt_t *md5_double_salt = (md5_double_salt_t *) esalt_buf;
+
+  hc_token_t token;
+
+  memset (&token, 0, sizeof (hc_token_t));
+
+  token.token_cnt  = 3;
+
+  token.sep[0]     = hashconfig->separator;
+  token.len[0]     = 32;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  token.sep[1]     = hashconfig->separator;
+  token.len_min[1] = SALT_MIN;
+  token.len_max[1] = SALT_MAX;
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  token.sep[2]     = hashconfig->separator;
+  token.len_min[2] = SALT_MIN;
+  token.len_max[2] = SALT_MAX;
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+  {
+    token.len_min[1] *= 2;
+    token.len_max[1] *= 2;
+
+    token.attr[1] |= TOKEN_ATTR_VERIFY_HEX;
+
+    token.len_min[2] *= 2;
+    token.len_max[2] *= 2;
+
+    token.attr[2] |= TOKEN_ATTR_VERIFY_HEX;
+  }
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  const u8 *hash_pos = token.buf[0];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+
+  if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+  {
+    digest[0] -= MD5M_A;
+    digest[1] -= MD5M_B;
+    digest[2] -= MD5M_C;
+    digest[3] -= MD5M_D;
+  }
+
+  const bool parse_rc1 = generic_salt_decode (hashconfig, token.buf[1], token.len[1], (u8 *) md5_double_salt->salt1_buf, &md5_double_salt->salt1_len);
+
+  if (parse_rc1 == false) return (PARSER_SALT_LENGTH);
+
+  const bool parse_rc2 = generic_salt_decode (hashconfig, token.buf[2], token.len[2], (u8 *) md5_double_salt->salt2_buf, &md5_double_salt->salt2_len);
+
+  if (parse_rc2 == false) return (PARSER_SALT_LENGTH);
+
+  // make salt sorter happy
+
+  md5_ctx_t md5_ctx;
+
+  md5_init   (&md5_ctx);
+  md5_update (&md5_ctx, md5_double_salt->salt1_buf, md5_double_salt->salt1_len);
+  md5_update (&md5_ctx, md5_double_salt->salt2_buf, md5_double_salt->salt2_len);
+  md5_final  (&md5_ctx);
+
+  salt->salt_buf[0] = md5_ctx.h[0];
+  salt->salt_buf[1] = md5_ctx.h[1];
+  salt->salt_buf[2] = md5_ctx.h[2];
+  salt->salt_buf[3] = md5_ctx.h[3];
+
+  salt->salt_len = 16;
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  const md5_double_salt_t *md5_double_salt = (const md5_double_salt_t *) esalt_buf;
+
+  // we can not change anything in the original buffer, otherwise destroying sorting
+  // therefore create some local buffer
+
+  u32 tmp[4];
+
+  tmp[0] = digest[0];
+  tmp[1] = digest[1];
+  tmp[2] = digest[2];
+  tmp[3] = digest[3];
+
+  if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+  {
+    tmp[0] += MD5M_A;
+    tmp[1] += MD5M_B;
+    tmp[2] += MD5M_C;
+    tmp[3] += MD5M_D;
+  }
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = 0;
+
+  u32_to_hex (tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[3], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = hashconfig->separator;
+
+  out_len += 1;
+
+  out_len += generic_salt_encode (hashconfig, (const u8 *) md5_double_salt->salt1_buf, md5_double_salt->salt1_len, out_buf + out_len);
+
+  out_buf[out_len] = hashconfig->separator;
+
+  out_len += 1;
+
+  out_len += generic_salt_encode (hashconfig, (const u8 *) md5_double_salt->salt2_buf, md5_double_salt->salt2_len, out_buf + out_len);
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m33000.pm b/tools/test_modules/m33000.pm
new file mode 100644
index 000000000..93498c0c5
--- /dev/null
+++ b/tools/test_modules/m33000.pm
@@ -0,0 +1,46 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::MD5 qw (md5_hex);
+
+sub module_constraints { [[0, 256], [0, 256], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word  = shift;
+  my $salt1 = shift;
+  my $salt2 = shift || random_numeric_string (256);
+
+  my $digest = md5_hex ($salt1 . $word . $salt2);
+
+  my $hash = sprintf ("%s:%s:%s", $digest, $salt1, $salt2);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my ($hash, $salt1, $salt2, $word) = split (':', $line);
+
+  return unless defined $hash;
+  return unless defined $salt1;
+  return unless defined $salt2;
+  return unless defined $word;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt1, $salt2);
+
+  return ($new_hash, $word);
+}
+
+1;

From 692db67babb526d0bbe1459a108f1aba135b834c Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 24 Apr 2025 22:28:10 +0200
Subject: [PATCH 18/83] restore the old usage message

---
 src/usage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/usage.c b/src/usage.c
index 1bee7c547..7cbf22348 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -91,7 +91,7 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   "     --progress-only            |      | Return ideal progress step size and time to process  |",
   " -c, --segment-size             | Num  | Sets size in MB to cache from the wordfile to X      | -c 32",
   "     --bitmap-min               | Num  | Sets minimum bits allowed for bitmaps to X           | --bitmap-min=24",
-  "     --bitmap-max               | Num  | Sets maximum bits allowed for bitmaps to X (max: 31) | --bitmap-max=24",
+  "     --bitmap-max               | Num  | Sets maximum bits allowed for bitmaps to X           | --bitmap-max=24",
   "     --cpu-affinity             | Str  | Locks to CPU devices, separated with commas          | --cpu-affinity=1,2,3",
   "     --hook-threads             | Num  | Sets number of threads for a hook (per compute unit) | --hook-threads=8",
   "     --hash-info                |      | Show information for each hash-mode                  |",

From 7f2df87cc1238f5f564e568f8f402858623d1321 Mon Sep 17 00:00:00 2001
From: luke <92046606+dunghm19@users.noreply.github.com>
Date: Fri, 25 Apr 2025 11:28:04 +0700
Subject: [PATCH 19/83] rename 32710 to 33100

---
 OpenCL/{m32710_a0-pure.cl => m33100_a0-pure.cl} | 4 ++--
 OpenCL/{m32710_a1-pure.cl => m33100_a1-pure.cl} | 4 ++--
 OpenCL/{m32710_a3-pure.cl => m33100_a3-pure.cl} | 4 ++--
 src/modules/{module_32710.c => module_33100.c}  | 2 +-
 tools/test_modules/{m32710.pm => m33100.pm}     | 0
 5 files changed, 7 insertions(+), 7 deletions(-)
 rename OpenCL/{m32710_a0-pure.cl => m33100_a0-pure.cl} (98%)
 rename OpenCL/{m32710_a1-pure.cl => m33100_a1-pure.cl} (98%)
 rename OpenCL/{m32710_a3-pure.cl => m33100_a3-pure.cl} (98%)
 rename src/modules/{module_32710.c => module_33100.c} (99%)
 rename tools/test_modules/{m32710.pm => m33100.pm} (100%)

diff --git a/OpenCL/m32710_a0-pure.cl b/OpenCL/m33100_a0-pure.cl
similarity index 98%
rename from OpenCL/m32710_a0-pure.cl
rename to OpenCL/m33100_a0-pure.cl
index 0a9680da4..a8f944ef8 100644
--- a/OpenCL/m32710_a0-pure.cl
+++ b/OpenCL/m33100_a0-pure.cl
@@ -28,7 +28,7 @@
 #define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
-KERNEL_FQ void m32710_mxx (KERN_ATTR_RULES ())
+KERNEL_FQ void m33100_mxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -146,7 +146,7 @@ KERNEL_FQ void m32710_mxx (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m32710_sxx (KERN_ATTR_RULES ())
+KERNEL_FQ void m33100_sxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
diff --git a/OpenCL/m32710_a1-pure.cl b/OpenCL/m33100_a1-pure.cl
similarity index 98%
rename from OpenCL/m32710_a1-pure.cl
rename to OpenCL/m33100_a1-pure.cl
index cb21ffd00..b56745f39 100644
--- a/OpenCL/m32710_a1-pure.cl
+++ b/OpenCL/m33100_a1-pure.cl
@@ -26,7 +26,7 @@
 #define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
-KERNEL_FQ void m32710_mxx (KERN_ATTR_BASIC ())
+KERNEL_FQ void m33100_mxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -142,7 +142,7 @@ KERNEL_FQ void m32710_mxx (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m32710_sxx (KERN_ATTR_BASIC ())
+KERNEL_FQ void m33100_sxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
diff --git a/OpenCL/m32710_a3-pure.cl b/OpenCL/m33100_a3-pure.cl
similarity index 98%
rename from OpenCL/m32710_a3-pure.cl
rename to OpenCL/m33100_a3-pure.cl
index 069ae70f5..9a5af70bb 100644
--- a/OpenCL/m32710_a3-pure.cl
+++ b/OpenCL/m33100_a3-pure.cl
@@ -26,7 +26,7 @@
 #define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
-KERNEL_FQ void m32710_mxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ void m33100_mxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
@@ -155,7 +155,7 @@ KERNEL_FQ void m32710_mxx (KERN_ATTR_VECTOR ())
   }
 }
 
-KERNEL_FQ void m32710_sxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ void m33100_sxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
diff --git a/src/modules/module_32710.c b/src/modules/module_33100.c
similarity index 99%
rename from src/modules/module_32710.c
rename to src/modules/module_33100.c
index 27abdf155..bd6e34163 100644
--- a/src/modules/module_32710.c
+++ b/src/modules/module_33100.c
@@ -18,7 +18,7 @@ static const u32   DGST_POS3 = 1;
 static const u32   DGST_SIZE = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED;
 static const char* HASH_NAME = "md5($salt.md5($pass).$salt)";
-static const u64   KERN_TYPE = 32710;
+static const u64   KERN_TYPE = 33100;
 static const u32   OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
                               | OPTI_TYPE_PRECOMPUTE_INIT
                               | OPTI_TYPE_EARLY_SKIP;
diff --git a/tools/test_modules/m32710.pm b/tools/test_modules/m33100.pm
similarity index 100%
rename from tools/test_modules/m32710.pm
rename to tools/test_modules/m33100.pm

From 439cb962f09e4186e3d376ef195031bfb4d86233 Mon Sep 17 00:00:00 2001
From: luke <92046606+dunghm19@users.noreply.github.com>
Date: Sat, 26 Apr 2025 06:44:27 +0700
Subject: [PATCH 20/83] apply code style on src/modules/module_33100.c

---
 src/modules/module_33100.c | 268 ++++++++++++++++++-------------------
 1 file changed, 134 insertions(+), 134 deletions(-)

diff --git a/src/modules/module_33100.c b/src/modules/module_33100.c
index bd6e34163..56e88084b 100644
--- a/src/modules/module_33100.c
+++ b/src/modules/module_33100.c
@@ -10,59 +10,59 @@
 #include "convert.h"
 #include "shared.h"
 
-static const u32   ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL;
-static const u32   DGST_POS0 = 0;
-static const u32   DGST_POS1 = 3;
-static const u32   DGST_POS2 = 2;
-static const u32   DGST_POS3 = 1;
-static const u32   DGST_SIZE = DGST_SIZE_4_4;
-static const u32   HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED;
-static const char* HASH_NAME = "md5($salt.md5($pass).$salt)";
-static const u64   KERN_TYPE = 33100;
-static const u32   OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
-                              | OPTI_TYPE_PRECOMPUTE_INIT
-                              | OPTI_TYPE_EARLY_SKIP;
-static const u64   OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
-                              | OPTS_TYPE_PT_GENERATE_LE
-                              | OPTS_TYPE_PT_ADD80
-                              | OPTS_TYPE_PT_ADDBITS14;
-static const u32   SALT_TYPE = SALT_TYPE_GENERIC;
-static const char* ST_PASS = "hashcat";
-static const char* ST_HASH = "866244ca1d318292a6f40b60e03fd29c:72219426709";
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 3;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 1;
+static const u32   DGST_SIZE      = DGST_SIZE_4_4;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_SALTED;
+static const char *HASH_NAME      = "md5($salt.md5($pass).$salt)";
+static const u64   KERN_TYPE      = 33100;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_PRECOMPUTE_INIT
+                                  | OPTI_TYPE_EARLY_SKIP;
+static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
+                                  | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_PT_ADD80
+                                  | OPTS_TYPE_PT_ADDBITS14;
+static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "866244ca1d318292a6f40b60e03fd29c:72219426709";
 
-u32         module_attack_exec(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return ATTACK_EXEC; }
-u32         module_dgst_pos0(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS0; }
-u32         module_dgst_pos1(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS1; }
-u32         module_dgst_pos2(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS2; }
-u32         module_dgst_pos3(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_POS3; }
-u32         module_dgst_size(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return DGST_SIZE; }
-u32         module_hash_category(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return HASH_CATEGORY; }
-const char* module_hash_name(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return HASH_NAME; }
-u64         module_kern_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return KERN_TYPE; }
-u32         module_opti_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return OPTI_TYPE; }
-u64         module_opts_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return OPTS_TYPE; }
-u32         module_salt_type(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return SALT_TYPE; }
-const char* module_st_hash(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return ST_HASH; }
-const char* module_st_pass(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const user_options_t* user_options, MAYBE_UNUSED const user_options_extra_t* user_options_extra) { return ST_PASS; }
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
-int module_hash_decode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED void* digest_buf, MAYBE_UNUSED salt_t* salt, MAYBE_UNUSED void* esalt_buf, MAYBE_UNUSED void* hook_salt_buf, MAYBE_UNUSED hashinfo_t* hash_info, const char* line_buf, MAYBE_UNUSED const int line_len)
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
-  u32* digest = (u32*)digest_buf;
+  u32 *digest = (u32 *) digest_buf;
 
   hc_token_t token;
 
-  memset(&token, 0, sizeof(hc_token_t));
+  memset (&token, 0, sizeof (hc_token_t));
 
-  token.token_cnt = 2;
+  token.token_cnt  = 2;
 
-  token.sep[0] = hashconfig->separator;
-  token.len[0] = 32;
-  token.attr[0] = TOKEN_ATTR_FIXED_LENGTH
-    | TOKEN_ATTR_VERIFY_HEX;
+  token.sep[0]     = hashconfig->separator;
+  token.len[0]     = 32;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
 
   token.len_min[1] = SALT_MIN;
   token.len_max[1] = SALT_MAX;
-  token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH;
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH;
 
   if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
   {
@@ -72,16 +72,16 @@ int module_hash_decode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED
     token.attr[1] |= TOKEN_ATTR_VERIFY_HEX;
   }
 
-  const int rc_tokenizer = input_tokenizer((const u8*)line_buf, line_len, &token);
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
 
   if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
 
-  const u8* hash_pos = token.buf[0];
+  const u8 *hash_pos = token.buf[0];
 
-  digest[0] = hex_to_u32(hash_pos + 0);
-  digest[1] = hex_to_u32(hash_pos + 8);
-  digest[2] = hex_to_u32(hash_pos + 16);
-  digest[3] = hex_to_u32(hash_pos + 24);
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
 
   if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
   {
@@ -91,19 +91,19 @@ int module_hash_decode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED
     digest[3] -= MD5M_D;
   }
 
-  const u8* salt_pos = token.buf[1];
+  const u8 *salt_pos = token.buf[1];
   const int salt_len = token.len[1];
 
-  const bool parse_rc = generic_salt_decode(hashconfig, salt_pos, salt_len, (u8*)salt->salt_buf, (int*)&salt->salt_len);
+  const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len);
 
   if (parse_rc == false) return (PARSER_SALT_LENGTH);
 
   return (PARSER_OK);
 }
 
-int module_hash_encode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED const void* digest_buf, MAYBE_UNUSED const salt_t* salt, MAYBE_UNUSED const void* esalt_buf, MAYBE_UNUSED const void* hook_salt_buf, MAYBE_UNUSED const hashinfo_t* hash_info, char* line_buf, MAYBE_UNUSED const int line_size)
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
 {
-  const u32* digest = (const u32*)digest_buf;
+  const u32 *digest = (const u32 *) digest_buf;
 
   // we can not change anything in the original buffer, otherwise destroying sorting
   // therefore create some local buffer
@@ -123,100 +123,100 @@ int module_hash_encode(MAYBE_UNUSED const hashconfig_t* hashconfig, MAYBE_UNUSED
     tmp[3] += MD5M_D;
   }
 
-  u8* out_buf = (u8*)line_buf;
+  u8 *out_buf = (u8 *) line_buf;
 
   int out_len = 0;
 
-  u32_to_hex(tmp[0], out_buf + out_len); out_len += 8;
-  u32_to_hex(tmp[1], out_buf + out_len); out_len += 8;
-  u32_to_hex(tmp[2], out_buf + out_len); out_len += 8;
-  u32_to_hex(tmp[3], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[3], out_buf + out_len); out_len += 8;
 
   out_buf[out_len] = hashconfig->separator;
 
   out_len += 1;
 
-  out_len += generic_salt_encode(hashconfig, (const u8*)salt->salt_buf, (const int)salt->salt_len, out_buf + out_len);
+  out_len += generic_salt_encode (hashconfig, (const u8 *) salt->salt_buf, (const int) salt->salt_len, out_buf + out_len);
 
   return out_len;
 }
 
-void module_init(module_ctx_t* module_ctx)
+void module_init (module_ctx_t *module_ctx)
 {
-  module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT;
-  module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT;
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
 
-  module_ctx->module_attack_exec = module_attack_exec;
-  module_ctx->module_benchmark_esalt = MODULE_DEFAULT;
-  module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT;
-  module_ctx->module_benchmark_mask = MODULE_DEFAULT;
-  module_ctx->module_benchmark_charset = MODULE_DEFAULT;
-  module_ctx->module_benchmark_salt = MODULE_DEFAULT;
-  module_ctx->module_build_plain_postprocess = MODULE_DEFAULT;
-  module_ctx->module_deep_comp_kernel = MODULE_DEFAULT;
-  module_ctx->module_deprecated_notice = MODULE_DEFAULT;
-  module_ctx->module_dgst_pos0 = module_dgst_pos0;
-  module_ctx->module_dgst_pos1 = module_dgst_pos1;
-  module_ctx->module_dgst_pos2 = module_dgst_pos2;
-  module_ctx->module_dgst_pos3 = module_dgst_pos3;
-  module_ctx->module_dgst_size = module_dgst_size;
-  module_ctx->module_dictstat_disable = MODULE_DEFAULT;
-  module_ctx->module_esalt_size = MODULE_DEFAULT;
-  module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
-  module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
-  module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
-  module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
-  module_ctx->module_hash_binary_count = MODULE_DEFAULT;
-  module_ctx->module_hash_binary_parse = MODULE_DEFAULT;
-  module_ctx->module_hash_binary_save = MODULE_DEFAULT;
-  module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT;
-  module_ctx->module_hash_decode_potfile = MODULE_DEFAULT;
-  module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT;
-  module_ctx->module_hash_decode = module_hash_decode;
-  module_ctx->module_hash_encode_status = MODULE_DEFAULT;
-  module_ctx->module_hash_encode_potfile = MODULE_DEFAULT;
-  module_ctx->module_hash_encode = module_hash_encode;
-  module_ctx->module_hash_init_selftest = MODULE_DEFAULT;
-  module_ctx->module_hash_mode = MODULE_DEFAULT;
-  module_ctx->module_hash_category = module_hash_category;
-  module_ctx->module_hash_name = module_hash_name;
-  module_ctx->module_hashes_count_min = MODULE_DEFAULT;
-  module_ctx->module_hashes_count_max = MODULE_DEFAULT;
-  module_ctx->module_hlfmt_disable = MODULE_DEFAULT;
-  module_ctx->module_hook_extra_param_size = MODULE_DEFAULT;
-  module_ctx->module_hook_extra_param_init = MODULE_DEFAULT;
-  module_ctx->module_hook_extra_param_term = MODULE_DEFAULT;
-  module_ctx->module_hook12 = MODULE_DEFAULT;
-  module_ctx->module_hook23 = MODULE_DEFAULT;
-  module_ctx->module_hook_salt_size = MODULE_DEFAULT;
-  module_ctx->module_hook_size = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options = MODULE_DEFAULT;
-  module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
-  module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
-  module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
-  module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
-  module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
-  module_ctx->module_kern_type = module_kern_type;
-  module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
-  module_ctx->module_opti_type = module_opti_type;
-  module_ctx->module_opts_type = module_opts_type;
-  module_ctx->module_outfile_check_disable = MODULE_DEFAULT;
-  module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT;
-  module_ctx->module_potfile_custom_check = MODULE_DEFAULT;
-  module_ctx->module_potfile_disable = MODULE_DEFAULT;
-  module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT;
-  module_ctx->module_pwdump_column = MODULE_DEFAULT;
-  module_ctx->module_pw_max = MODULE_DEFAULT;
-  module_ctx->module_pw_min = MODULE_DEFAULT;
-  module_ctx->module_salt_max = MODULE_DEFAULT;
-  module_ctx->module_salt_min = MODULE_DEFAULT;
-  module_ctx->module_salt_type = module_salt_type;
-  module_ctx->module_separator = MODULE_DEFAULT;
-  module_ctx->module_st_hash = module_st_hash;
-  module_ctx->module_st_pass = module_st_pass;
-  module_ctx->module_tmp_size = MODULE_DEFAULT;
-  module_ctx->module_unstable_warning = MODULE_DEFAULT;
-  module_ctx->module_warmup_disable = MODULE_DEFAULT;
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
 }

From 3c921390efbae11decd177f8c9e85248908eb140 Mon Sep 17 00:00:00 2001
From: luke <92046606+dunghm19@users.noreply.github.com>
Date: Sat, 26 Apr 2025 06:48:08 +0700
Subject: [PATCH 21/83] apply style docs

---
 docs/changes.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index eb51b703b..ec20d0f63 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -49,7 +49,7 @@
 - Added hash-mode: md5(md5($salt).md5(md5($pass)))
 - Added hash-mode: Domain Cached Credentials 2 (DCC2), MS Cache 2, (NT)
 - Added hash-mode: Domain Cached Credentials (DCC), MS Cache (NT)
-- Added hash-mode: md5(md5(md5($pass).$salt1).$salt2)	
+- Added hash-mode: md5(md5(md5($pass).$salt1).$salt2)
 - Added hash-mode: md5(md5(md5($pass)).$salt)
 - Added hash-mode: md5(sha1($pass.$salt))
 - Added hash-mode: md5(sha1($salt.$pass))

From 55f53ba07624ca44e9bc7c9943424e1f36dc21f9 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 11 May 2025 17:16:03 +0200
Subject: [PATCH 22/83] assigned -H to --hash-info

---
 docs/changes.txt                |   2 +
 extra/tab_completion/hashcat.sh |   6 +-
 include/types.h                 |   6 +-
 src/backend.c                   |   2 +-
 src/bitmap.c                    |   2 +-
 src/combinator.c                |   2 +-
 src/cpt.c                       |   2 +-
 src/debugfile.c                 |   2 +-
 src/dictstat.c                  |   2 +-
 src/hwmon.c                     |   2 +-
 src/induct.c                    |   2 +-
 src/interface.c                 |   2 +-
 src/loopback.c                  |   2 +-
 src/main.c                      |  12 +-
 src/terminal.c                  | 224 ++++++++++++++++++++++++++------
 src/usage.c                     |   2 +-
 src/user_options.c              |  29 +++--
 17 files changed, 230 insertions(+), 71 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 50b23505f..f0f74d925 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -130,6 +130,8 @@
 - Metal Backend: allow use of devices with Metal if runtime version is >= 200
 - Metal Backend: disable Metal devices only if at least one OpenCL device is active
 - User Options: added --metal-compiler-runtime option
+- User Options: assigned -H to --hash-info
+- Hash-Info: show more details using -HH
 - Hardware Monitor: avoid sprintf in src/ext_iokit.c
 - Help: show supported hash-modes only with -hh
 - Makefile: prevent make failure with Apple Silicon in case of partial rebuild
diff --git a/extra/tab_completion/hashcat.sh b/extra/tab_completion/hashcat.sh
index f823cda72..953775345 100755
--- a/extra/tab_completion/hashcat.sh
+++ b/extra/tab_completion/hashcat.sh
@@ -425,7 +425,7 @@ _hashcat ()
   local HIDDEN_FILES_AGGRESSIVE="${HIDDEN_FILES}|hcmask|hcchr"
   local BUILD_IN_CHARSETS='?l ?u ?d ?a ?b ?s ?h ?H'
 
-  local SHORT_OPTS="-m -a -V -h -b -t -T -o -p -c -d -D -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O -S -z -M -Y"
+  local SHORT_OPTS="-m -a -V -h -H -b -t -T -o -p -c -d -D -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O -S -z -M -Y"
   local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --benchmark-all --hex-salt --hex-wordlist --hex-charset --force --status --status-json --status-timer --stdin-timeout-abort --machine-readable --loopback --markov-hcstat2 --markov-disable --markov-inverse --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-json --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --deprecated-check-disable --left --username --dynamic-x --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --hash-info --backend-ignore-cuda --backend-ignore-opencl --backend-ignore-hip --backend-ignore-metal --backend-info --backend-devices --backend-devices-virtual --opencl-device-types --backend-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-disable --hwmon-temp-abort --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-func-sel --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --hook-threads --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --multiply-accel-disable --self-test-disable --slow-candidates --brain-server --brain-server-timer --brain-client --brain-client-features --brain-host --brain-port --brain-session --brain-session-whitelist --brain-password --identify"
   local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --stdin-timeout-abort --markov-hcstat2 --markov-threshold --runtime --session --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --backend-devices --backend-devices-virtual --opencl-device-types --backend-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-temp-abort --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-func-sel --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --hook-threads --increment-min --increment-max --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --brain-server-timer --brain-client-features --brain-host --brain-password --brain-port --brain-session --brain-session-whitelist"
 
@@ -729,11 +729,11 @@ _hashcat ()
 
   while [ ${h} -le ${COMP_CWORD} ]; do
 
-    if   [[ "${COMP_WORDS[h]}" == "-a" ]]; then
+    if [[ "${COMP_WORDS[h]}" == "-a" ]]; then
 
       attack_mode=${COMP_WORDS[$((h + 1))]}
 
-    elif   [[ "${COMP_WORDS[h]}" == -a* ]]; then
+    elif [[ "${COMP_WORDS[h]}" == -a* ]]; then
 
       attack_mode=${COMP_WORDS[h]:2}
 
diff --git a/include/types.h b/include/types.h
index e6ea946f9..834bb999e 100644
--- a/include/types.h
+++ b/include/types.h
@@ -646,7 +646,7 @@ typedef enum user_options_defaults
   #else
   HWMON_TEMP_ABORT         = 90,
   #endif
-  HASH_INFO                = false,
+  HASH_INFO                = 0,
   HASH_MODE                = 0,
   HCCAPX_MESSAGE_PAIR      = 0,
   HEX_CHARSET              = false,
@@ -761,7 +761,7 @@ typedef enum user_options_map
   IDX_DYNAMIC_X                 = 0xff55,
   IDX_ENCODING_FROM             = 0xff15,
   IDX_ENCODING_TO               = 0xff16,
-  IDX_HASH_INFO                 = 0xff17,
+  IDX_HASH_INFO                 = 'H', // 0xff17
   IDX_FORCE                     = 0xff18,
   IDX_HWMON_DISABLE             = 0xff19,
   IDX_HWMON_TEMP_ABORT          = 0xff1a,
@@ -2344,7 +2344,6 @@ typedef struct user_options
   bool         deprecated_check;
   bool         dynamic_x;
   bool         hwmon;
-  bool         hash_info;
   bool         hex_charset;
   bool         hex_salt;
   bool         hex_wordlist;
@@ -2429,6 +2428,7 @@ typedef struct user_options
   #endif
   u32          debug_mode;
   u32          hwmon_temp_abort;
+  u32          hash_info;
   int          hash_mode;
   u32          hccapx_message_pair;
   u32          hook_threads;
diff --git a/src/backend.c b/src/backend.c
index 2892cd6b4..430fff33d 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -4258,8 +4258,8 @@ int backend_ctx_init (hashcat_ctx_t *hashcat_ctx)
   backend_ctx->enabled = false;
 
   if (user_options->usage      > 0)    return 0;
+  if (user_options->hash_info  > 0)    return 0;
 
-  if (user_options->hash_info == true) return 0;
   if (user_options->keyspace  == true) return 0;
   if (user_options->left      == true) return 0;
   if (user_options->show      == true) return 0;
diff --git a/src/bitmap.c b/src/bitmap.c
index 367b16a7c..1652b7cb9 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -81,8 +81,8 @@ int bitmap_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
-  if (user_options->hash_info    == true) return 0;
   if (user_options->keyspace     == true) return 0;
   if (user_options->left         == true) return 0;
   if (user_options->show         == true) return 0;
diff --git a/src/combinator.c b/src/combinator.c
index 654ada03f..47715ff5e 100644
--- a/src/combinator.c
+++ b/src/combinator.c
@@ -21,8 +21,8 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
-  if (user_options->hash_info    == true) return 0;
   if (user_options->left         == true) return 0;
   if (user_options->show         == true) return 0;
   if (user_options->version      == true) return 0;
diff --git a/src/cpt.c b/src/cpt.c
index 8dc393c3a..97627b8c0 100644
--- a/src/cpt.c
+++ b/src/cpt.c
@@ -17,8 +17,8 @@ int cpt_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
-  if (user_options->hash_info    == true) return 0;
   if (user_options->keyspace     == true) return 0;
   if (user_options->left         == true) return 0;
   if (user_options->show         == true) return 0;
diff --git a/src/debugfile.c b/src/debugfile.c
index 20ee3ad1f..25b0a6033 100644
--- a/src/debugfile.c
+++ b/src/debugfile.c
@@ -118,9 +118,9 @@ int debugfile_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage          > 0)    return 0;
   if (user_options->backend_info   > 0)    return 0;
+  if (user_options->hash_info      > 0)    return 0;
 
   if (user_options->benchmark     == true) return 0;
-  if (user_options->hash_info     == true) return 0;
   if (user_options->keyspace      == true) return 0;
   if (user_options->left          == true) return 0;
   if (user_options->show          == true) return 0;
diff --git a/src/dictstat.c b/src/dictstat.c
index a607cf7d6..416eb90b3 100644
--- a/src/dictstat.c
+++ b/src/dictstat.c
@@ -58,9 +58,9 @@ int dictstat_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage          > 0)    return 0;
   if (user_options->backend_info   > 0)    return 0;
+  if (user_options->hash_info      > 0)    return 0;
 
   if (user_options->benchmark     == true) return 0;
-  if (user_options->hash_info     == true) return 0;
   if (user_options->keyspace      == true) return 0;
   if (user_options->left          == true) return 0;
   if (user_options->show          == true) return 0;
diff --git a/src/hwmon.c b/src/hwmon.c
index d18d12552..dffc59cd5 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -1228,8 +1228,8 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage          > 0)     return 0;
   if (user_options->backend_info   > 0)     return 0;
+  if (user_options->hash_info      > 0)     return 0;
 
-  if (user_options->hash_info     == true)  return 0;
   if (user_options->keyspace      == true)  return 0;
   if (user_options->left          == true)  return 0;
   if (user_options->show          == true)  return 0;
diff --git a/src/induct.c b/src/induct.c
index 3d9755034..476abe958 100644
--- a/src/induct.c
+++ b/src/induct.c
@@ -41,9 +41,9 @@ int induct_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage          > 0)    return 0;
   if (user_options->backend_info   > 0)    return 0;
+  if (user_options->hash_info      > 0)    return 0;
 
   if (user_options->benchmark     == true) return 0;
-  if (user_options->hash_info     == true) return 0;
   if (user_options->keyspace      == true) return 0;
   if (user_options->left          == true) return 0;
   if (user_options->show          == true) return 0;
diff --git a/src/interface.c b/src/interface.c
index 691e2931b..d52c0b1dc 100644
--- a/src/interface.c
+++ b/src/interface.c
@@ -355,7 +355,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
 
     hashconfig->has_optimized_kernel = hc_path_read (source_file);
 
-    if (user_options->hash_info == false)
+    if (user_options->hash_info == 0 || user_options->hash_info > 1)
     {
       if (user_options->optimized_kernel == true)
       {
diff --git a/src/loopback.c b/src/loopback.c
index 28dcff0d0..b5799a435 100644
--- a/src/loopback.c
+++ b/src/loopback.c
@@ -62,9 +62,9 @@ int loopback_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage          > 0)    return 0;
   if (user_options->backend_info   > 0)    return 0;
+  if (user_options->hash_info      > 0)    return 0;
 
   if (user_options->benchmark     == true) return 0;
-  if (user_options->hash_info     == true) return 0;
   if (user_options->keyspace      == true) return 0;
   if (user_options->left          == true) return 0;
   if (user_options->show          == true) return 0;
diff --git a/src/main.c b/src/main.c
index 085ce38b4..f7a820cce 100644
--- a/src/main.c
+++ b/src/main.c
@@ -192,12 +192,13 @@ static void main_outerloop_starting (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA
 
   status_ctx->shutdown_outer = false;
 
-  if (user_options->hash_info    == true) return;
+  if (user_options->backend_info  > 0)    return;
+  if (user_options->hash_info     > 0)    return;
+
   if (user_options->keyspace     == true) return;
   if (user_options->stdout_flag  == true) return;
   if (user_options->speed_only   == true) return;
   if (user_options->identify     == true) return;
-  if (user_options->backend_info  > 0)    return;
 
   if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK))
   {
@@ -269,10 +270,11 @@ static void main_cracker_finished (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYB
   const user_options_t       *user_options       = hashcat_ctx->user_options;
   const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
 
-  if (user_options->hash_info    == true) return;
+  if (user_options->backend_info  > 0)    return;
+  if (user_options->hash_info     > 0)    return;
+
   if (user_options->keyspace     == true) return;
   if (user_options->stdout_flag  == true) return;
-  if (user_options->backend_info  > 0)    return;
 
   // if we had a prompt, clear it
 
@@ -1328,7 +1330,7 @@ int main (int argc, char **argv)
 
       rc_final = 0;
     }
-    else if (user_options->hash_info == true)
+    else if (user_options->hash_info > 0)
     {
       hash_info (hashcat_ctx);
 
diff --git a/src/terminal.c b/src/terminal.c
index 211a8b24f..9bcbdb4cf 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -87,7 +87,7 @@ void welcome_screen (hashcat_ctx_t *hashcat_ctx, const char *version_tag)
     event_log_info (hashcat_ctx, "%s (%s) starting in autodetect mode", PROGNAME, version_tag);
     event_log_info (hashcat_ctx, NULL);
   }
-  else if (user_options->hash_info == true)
+  else if (user_options->hash_info > 0)
   {
     event_log_info (hashcat_ctx, "%s (%s) starting in hash-info mode", PROGNAME, version_tag);
     event_log_info (hashcat_ctx, NULL);
@@ -634,7 +634,7 @@ void compress_terminal_line_length (char *out_buf, const size_t keep_from_beginn
   *ptr1 = 0;
 }
 
-void json_encode (char *text, char *escaped)
+void json_encode (const char *text, char *escaped)
 {
   /*
    * Based on https://www.freeformatter.com/json-escape.html, below these 7 different chars
@@ -667,6 +667,8 @@ void json_encode (char *text, char *escaped)
 
 void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *user_options_extra)
 {
+  const user_options_t *user_options = hashcat_ctx->user_options;
+
   if (hashconfig_init (hashcat_ctx) == 0)
   {
     hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
@@ -677,25 +679,70 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
     printf ("\"category\": \"%s\", ", strhashcategory (hashconfig->hash_category));
     printf ("\"slow_hash\": %s, ", (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) ? "false" : "true");
 
-    printf ("\"password_len_min\": %u, ", hashconfig->pw_min);
-    printf ("\"password_len_max\": %u, ", hashconfig->pw_max);
-
     printf ("\"is_deprecated\": %s, ", (module_ctx->module_deprecated_notice != MODULE_DEFAULT) ? "true" : "false");
 
-    if (module_ctx->module_deprecated_notice != MODULE_DEFAULT) {
-      const char *deprecated_notice = module_ctx->module_deprecated_notice (hashconfig, hashcat_ctx->user_options, user_options_extra);
-      printf ("\"deprecated_notice\": \"%s\", ", deprecated_notice);
+    if (module_ctx->module_deprecated_notice != MODULE_DEFAULT)
+    {
+      const char *t_deprecated_notice = module_ctx->module_deprecated_notice (hashconfig, hashcat_ctx->user_options, user_options_extra);
+
+      char *t_deprecated_notice_json_encoded = (char *) hcmalloc (strlen (t_deprecated_notice) * 2);
+
+      json_encode (t_deprecated_notice, t_deprecated_notice_json_encoded);
+
+      printf ("\"deprecated_notice\": \"%s\", ", t_deprecated_notice_json_encoded);
+
+      hcfree (t_deprecated_notice_json_encoded);
     }
+    else
+    {
+      printf ("\"deprecated_notice\": \"%s\", ", "N/A");
+    }
+
+    const char *t_pw_desc = (hashconfig->opts_type & OPTS_TYPE_PT_HEX) ? "HEX" : "plain";
+
+    u32 t_pw_min = hashconfig->pw_min;
+    u32 t_pw_max = hashconfig->pw_max;
+
+    if (user_options->hash_info > 1)
+    {
+      if (hashconfig->opts_type & OPTS_TYPE_PT_HEX)
+      {
+        t_pw_min *= 2;
+        t_pw_max *= 2;
+      }
+    }
+
+    printf ("\"password_type\": %s, ", t_pw_desc);
+    printf ("\"password_len_min\": %u, ", t_pw_min);
+    printf ("\"password_len_max\": %u, ", t_pw_max);
 
     printf ("\"is_salted\": %s, ", (hashconfig->is_salted == true) ? "true" : "false");
 
     if (hashconfig->is_salted == true)
     {
       u32 t = hashconfig->salt_type;
-      const char *t_desc = (t == SALT_TYPE_EMBEDDED) ? "embedded" : (t == SALT_TYPE_GENERIC) ? "generic" : "virtual";
-      printf ("\"salt_type\": \"%s\", ", t_desc);
-      printf ("\"salt_len_min\": %u, ", hashconfig->salt_min);
-      printf ("\"salt_len_max\": %u, ", hashconfig->salt_max);
+
+      const char *t_salt_desc = (t == SALT_TYPE_EMBEDDED) ? "embedded" : (t == SALT_TYPE_GENERIC) ? "generic" : "virtual";
+
+      printf ("\"salt_type\": \"%s\", ", t_salt_desc);
+
+      if (hashconfig->salt_type == SALT_TYPE_GENERIC || hashconfig->salt_type == SALT_TYPE_EMBEDDED)
+      {
+        u32 t_salt_min = hashconfig->salt_min;
+        u32 t_salt_max = hashconfig->salt_max;
+
+        if (user_options->hash_info > 1)
+        {
+          if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+          {
+            t_salt_min *= 2;
+            t_salt_max *= 2;
+          }
+        }
+
+        printf ("\"salt_len_min\": %u, ", t_salt_min);
+        printf ("\"salt_len_max\": %u, ", t_salt_max);
+      }
     }
 
     if ((hashconfig->has_pure_kernel) && (hashconfig->has_optimized_kernel))
@@ -711,6 +758,39 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
       printf ("\"kernel_type\": %s, ", "[ \"optimized\" ]");
     }
 
+    if (user_options->hash_info > 1)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+      {
+        printf ("\"kernel_type_filter\": %s, ", "[ \"optimized\" ]");
+      }
+      else
+      {
+        printf ("\"kernel_type_filter\": %s, ", "[ \"pure\" ]");
+      }
+
+      printf ("\"attack_mode_filter\": %d, ", user_options->attack_mode);
+
+      // almost always 1 and -1
+      printf ("\"hashes_count_min\": %d, ", hashconfig->hashes_count_min);
+      printf ("\"hashes_count_max\": %d, ", hashconfig->hashes_count_max);
+
+      if (hashconfig->salt_type == SALT_TYPE_GENERIC || hashconfig->salt_type == SALT_TYPE_EMBEDDED)
+      {
+        bool multi_hash_same_salt = true;
+
+        if ((hashconfig->opts_type & OPTS_TYPE_DEEP_COMP_KERNEL) == 0)
+        {
+          if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+          {
+            multi_hash_same_salt = false;
+          }
+        }
+
+        printf ("\"hashes_with_same_salt\": %s, ", (multi_hash_same_salt == true) ? "true" : "false");
+      }
+    }
+
     if ((hashconfig->st_hash != NULL) && (hashconfig->st_pass != NULL))
     {
       if (hashconfig->opts_type & OPTS_TYPE_BINARY_HASHFILE)
@@ -731,7 +811,7 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
 
       char *example_hash_json_encoded = (char *) hcmalloc (strlen (hashconfig->st_hash) * 2);
 
-      json_encode ((char *)hashconfig->st_hash, example_hash_json_encoded);
+      json_encode (hashconfig->st_hash, example_hash_json_encoded);
 
       printf ("\"example_hash\": \"%s\", ", example_hash_json_encoded);
 
@@ -807,6 +887,7 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
     printf ("\"autodetect_enabled\": %s, ", (hashconfig->opts_type & OPTS_TYPE_AUTODETECT_DISABLE) ? "false" : "true");
     printf ("\"self_test_enabled\": %s, ", (hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) ? "false" : "true");
     printf ("\"potfile_enabled\": %s, ", (hashconfig->opts_type & OPTS_TYPE_POTFILE_NOPASS) ? "false" : "true");
+    printf ("\"keep_guessing\": %s, ", (hashconfig->opts_type & OPTS_TYPE_SUGGEST_KG) ? "true" : "false");
     printf ("\"custom_plugin\": %s, ", (hashconfig->opts_type & OPTS_TYPE_STOCK_MODULE) ? "false" : "true");
 
     if (hashconfig->opts_type & OPTS_TYPE_PT_ALWAYS_ASCII)
@@ -821,8 +902,6 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
     {
       printf ("\"plaintext_encoding\": %s", "[ \"ASCII\", \"HEX\" ]");
     }
-
-    event_log_info (hashcat_ctx, NULL);
   }
 
   printf (" }");
@@ -832,30 +911,73 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
 
 void hash_info_single (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *user_options_extra)
 {
+  const user_options_t *user_options = hashcat_ctx->user_options;
+
   if (hashconfig_init (hashcat_ctx) == 0)
   {
     hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
+    module_ctx_t *module_ctx = hashcat_ctx->module_ctx;
 
     event_log_info (hashcat_ctx, "Hash mode #%u", hashconfig->hash_mode);
     event_log_info (hashcat_ctx, "  Name................: %s", hashconfig->hash_name);
     event_log_info (hashcat_ctx, "  Category............: %s", strhashcategory (hashconfig->hash_category));
     event_log_info (hashcat_ctx, "  Slow.Hash...........: %s", (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) ? "No" : "Yes");
 
-    event_log_info (hashcat_ctx, "  Password.Len.Min....: %u", hashconfig->pw_min);
-    event_log_info (hashcat_ctx, "  Password.Len.Max....: %u", hashconfig->pw_max);
+    event_log_info (hashcat_ctx, "  Deprecated..........: %s", (module_ctx->module_deprecated_notice != MODULE_DEFAULT) ? "Yes" : "No");
+
+    char *t_deprecated_notice = "N/A\0";
+
+    if (module_ctx->module_deprecated_notice != MODULE_DEFAULT)
+    {
+      t_deprecated_notice = (char *) module_ctx->module_deprecated_notice (hashconfig, hashcat_ctx->user_options, user_options_extra);
+    }
+
+    event_log_info (hashcat_ctx, "  Deprecated.Notice...: %s", t_deprecated_notice);
+
+    const char *t_pw_desc = (hashconfig->opts_type & OPTS_TYPE_PT_HEX) ? "HEX\0" : "plain\0";
+
+    u32 t_pw_min = hashconfig->pw_min;
+    u32 t_pw_max = hashconfig->pw_max;
+
+    if (user_options->hash_info > 1)
+    {
+      if (hashconfig->opts_type & OPTS_TYPE_PT_HEX)
+      {
+        t_pw_min *= 2;
+        t_pw_max *= 2;
+      }
+    }
+
+    event_log_info (hashcat_ctx, "  Password.Type.......: %s", t_pw_desc);
+    event_log_info (hashcat_ctx, "  Password.Len.Min....: %u", t_pw_min);
+    event_log_info (hashcat_ctx, "  Password.Len.Max....: %u", t_pw_max);
 
     if (hashconfig->is_salted == true)
     {
       u32 t = hashconfig->salt_type;
-      const char *t_desc = (t == SALT_TYPE_EMBEDDED) ? "Embedded\0" : (t == SALT_TYPE_GENERIC) ? "Generic\0" : "Virtual\0";
-      event_log_info (hashcat_ctx, "  Salt.Type...........: %s", t_desc);
-      event_log_info (hashcat_ctx, "  Salt.Len.Min........: %u", hashconfig->salt_min);
-      event_log_info (hashcat_ctx, "  Salt.Len.Max........: %u", hashconfig->salt_max);
-    }
 
-    // almost always 1 and -1
-    //event_log_info (hashcat_ctx, "  Hashes.Count.Min....: %d", hashconfig->hashes_count_min);
-    //event_log_info (hashcat_ctx, "  Hashes.Count.Max....: %u", hashconfig->hashes_count_max);
+      const char *t_salt_desc = (t == SALT_TYPE_EMBEDDED) ? "Embedded\0" : (t == SALT_TYPE_GENERIC) ? "Generic\0" : "Virtual\0";
+
+      event_log_info (hashcat_ctx, "  Salt.Type...........: %s", t_salt_desc);
+
+      if (hashconfig->salt_type == SALT_TYPE_GENERIC || hashconfig->salt_type == SALT_TYPE_EMBEDDED)
+      {
+        u32 t_salt_min = hashconfig->salt_min;
+        u32 t_salt_max = hashconfig->salt_max;
+
+        if (user_options->hash_info > 1)
+        {
+          if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+          {
+            t_salt_min *= 2;
+            t_salt_max *= 2;
+          }
+        }
+
+        event_log_info (hashcat_ctx, "  Salt.Len.Min........: %u", t_salt_min);
+        event_log_info (hashcat_ctx, "  Salt.Len.Max........: %u", t_salt_max);
+      }
+    }
 
     if ((hashconfig->has_pure_kernel) && (hashconfig->has_optimized_kernel))
     {
@@ -870,6 +992,39 @@ void hash_info_single (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *user_op
       event_log_info (hashcat_ctx, "  Kernel.Type(s)......: optimized");
     }
 
+    if (user_options->hash_info > 1)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+      {
+        event_log_info (hashcat_ctx, "  Kernel.Type.Filter..: optimized");
+      }
+      else
+      {
+        event_log_info (hashcat_ctx, "  Kernel.Type.Filter..: pure");
+      }
+
+      event_log_info (hashcat_ctx, "  Attack.Mode.Filter..: %u", user_options->attack_mode);
+
+      // almost always 1 and -1
+      event_log_info (hashcat_ctx, "  Hashes.Count.Min....: %d", hashconfig->hashes_count_min);
+      event_log_info (hashcat_ctx, "  Hashes.Count.Max....: %d", hashconfig->hashes_count_max);
+
+      if (hashconfig->salt_type == SALT_TYPE_GENERIC || hashconfig->salt_type == SALT_TYPE_EMBEDDED)
+      {
+        bool multi_hash_same_salt = true;
+
+        if ((hashconfig->opts_type & OPTS_TYPE_DEEP_COMP_KERNEL) == 0)
+        {
+          if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
+          {
+            multi_hash_same_salt = false;
+          }
+        }
+
+        event_log_info (hashcat_ctx, "  Hashes.w/.Same.Salt.: %s", (multi_hash_same_salt == true) ? "Allowed" : "Not allowed");
+      }
+    }
+
     if ((hashconfig->st_hash != NULL) && (hashconfig->st_pass != NULL))
     {
       if (hashconfig->opts_type & OPTS_TYPE_BINARY_HASHFILE)
@@ -973,6 +1128,7 @@ void hash_info_single (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *user_op
     event_log_info (hashcat_ctx, "  Autodetect.Enabled..: %s", (hashconfig->opts_type & OPTS_TYPE_AUTODETECT_DISABLE) ? "No" : "Yes");
     event_log_info (hashcat_ctx, "  Self.Test.Enabled...: %s", (hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) ? "No" : "Yes");
     event_log_info (hashcat_ctx, "  Potfile.Enabled.....: %s", (hashconfig->opts_type & OPTS_TYPE_POTFILE_NOPASS) ? "No" : "Yes");
+    event_log_info (hashcat_ctx, "  Keep.Guessing.......: %s", (hashconfig->opts_type & OPTS_TYPE_SUGGEST_KG) ? "Yes" : "No");
     event_log_info (hashcat_ctx, "  Custom.Plugin.......: %s", (hashconfig->opts_type & OPTS_TYPE_STOCK_MODULE) ? "No" : "Yes");
 
     if (hashconfig->opts_type & OPTS_TYPE_PT_ALWAYS_ASCII)
@@ -1915,7 +2071,7 @@ void status_display_status_json (hashcat_ctx_t *hashcat_ctx)
       printf (",");
     }
 
-    printf (" { \"device_id\": %u,", device_id + 1);
+    printf (" { \"device_id\": %d,", device_id + 1);
 
     char *device_name_json_encoded = (char *) hcmalloc (strlen (device_info->device_name) * 2);
 
@@ -2419,18 +2575,10 @@ void status_display (hashcat_ctx_t *hashcat_ctx)
     }
     else
     {
-      event_log_info (hashcat_ctx,
-        "Remaining........: %u (%.2f%%) Digests",
-        digests_remain,
-        digests_remain_percent);
+      event_log_info (hashcat_ctx, "Remaining........: %u (%.2f%%) Digests", digests_remain, digests_remain_percent);
     }
-  }
 
-  if (hashcat_status->digests_cnt > 1000)
-  {
-    event_log_info (hashcat_ctx,
-      "Recovered/Time...: %s",
-      hashcat_status->cpt);
+    event_log_info (hashcat_ctx, "Recovered/Time...: %s", hashcat_status->cpt);
   }
 
   switch (hashcat_status->progress_mode)
@@ -2754,7 +2902,7 @@ void status_speed_json (hashcat_ctx_t *hashcat_ctx)
       printf (",");
     }
 
-    printf (" { \"device_id\": %u,", device_id + 1);
+    printf (" { \"device_id\": %d,", device_id + 1);
     printf (" \"speed\": %" PRIu64 " }", (u64) (device_info->hashes_msec_dev_benchmark * 1000));
     device_num++;
   }
@@ -2871,7 +3019,7 @@ void status_progress_json (hashcat_ctx_t *hashcat_ctx)
       printf (",");
     }
 
-    printf (" { \"device_id\": %u,", device_id + 1);
+    printf (" { \"device_id\": %d,", device_id + 1);
     printf (" \"progress\": %" PRIu64 ",", device_info->progress_dev);
     printf (" \"runtime\": %0.2f }", device_info->runtime_msec_dev);
     device_num++;
diff --git a/src/usage.c b/src/usage.c
index 7cbf22348..b72739f51 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -94,7 +94,7 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   "     --bitmap-max               | Num  | Sets maximum bits allowed for bitmaps to X           | --bitmap-max=24",
   "     --cpu-affinity             | Str  | Locks to CPU devices, separated with commas          | --cpu-affinity=1,2,3",
   "     --hook-threads             | Num  | Sets number of threads for a hook (per compute unit) | --hook-threads=8",
-  "     --hash-info                |      | Show information for each hash-mode                  |",
+  " -H, --hash-info                |      | Show information for each hash-mode                  | -H or -HH",
   "     --example-hashes           |      | Alias of --hash-info                                 |",
   "     --backend-ignore-cuda      |      | Do not try to open CUDA interface on startup         |",
   "     --backend-ignore-hip       |      | Do not try to open HIP interface on startup          |",
diff --git a/src/user_options.c b/src/user_options.c
index b1ed588f4..7adbcf345 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -21,9 +21,9 @@
 #endif
 
 #ifdef WITH_BRAIN
-static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSY:z";
+static const char *const short_options = "hHVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSY:z";
 #else
-static const char *const short_options = "hVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSY:";
+static const char *const short_options = "hHVvm:a:r:j:k:g:o:t:d:D:n:u:T:c:p:s:l:1:2:3:4:iIbw:OMSY:";
 #endif
 
 static char *const SEPARATOR = ":";
@@ -415,7 +415,7 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
       case IDX_ENCODING_TO:               user_options->encoding_to               = optarg;                          break;
       case IDX_INDUCTION_DIR:             user_options->induction_dir             = optarg;                          break;
       case IDX_OUTFILE_CHECK_DIR:         user_options->outfile_check_dir         = optarg;                          break;
-      case IDX_HASH_INFO:                 user_options->hash_info                 = true;                            break;
+      case IDX_HASH_INFO:                 user_options->hash_info++;                                                 break;
       case IDX_FORCE:                     user_options->force                     = true;                            break;
       case IDX_SELF_TEST_DISABLE:         user_options->self_test                 = false;                           break;
       case IDX_SKIP:                      user_options->skip                      = hc_strtoull (optarg, NULL, 10);
@@ -1291,7 +1291,7 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
       return -1;
     }
 
-    if (user_options->hash_info == true)
+    if (user_options->hash_info > 0)
     {
       event_log_error (hashcat_ctx, "Use of --hash-info is not allowed in benchmark mode.");
 
@@ -1476,6 +1476,13 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
     return -1;
   }
 
+  if (user_options->hash_info > 2)
+  {
+    event_log_error (hashcat_ctx, "Invalid --hash-info/-H value, must have a value greater or equal to 0 and lower than 3.");
+
+    return -1;
+  }
+
   #ifdef WITH_BRAIN
   if ((user_options->brain_client == true) && (user_options->remove == true))
   {
@@ -1570,7 +1577,7 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
       show_error = false;
     }
   }
-  else if (user_options->hash_info == true)
+  else if (user_options->hash_info > 0)
   {
     if (user_options->hc_argc == 0)
     {
@@ -1768,7 +1775,7 @@ void user_options_session_auto (hashcat_ctx_t *hashcat_ctx)
       user_options->session = "benchmark";
     }
 
-    if (user_options->hash_info == true)
+    if (user_options->hash_info > 0)
     {
       user_options->session = "hash_info";
     }
@@ -1851,12 +1858,12 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
     user_options->bitmap_max          = 1;
   }
 
-  if (user_options->hash_info        == true
-   || user_options->keyspace         == true
+  if (user_options->keyspace         == true
    || user_options->speed_only       == true
    || user_options->progress_only    == true
    || user_options->identify         == true
    || user_options->usage             > 0
+   || user_options->hash_info         > 0
    || user_options->backend_info      > 0)
   {
     user_options->hwmon               = false;
@@ -1908,7 +1915,7 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
-  if (user_options->hash_info == true)
+  if (user_options->hash_info > 0)
   {
     user_options->quiet = true;
   }
@@ -2022,7 +2029,7 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->attack_mode == ATTACK_MODE_BF)
   {
-    if (user_options->hash_info == true)
+    if (user_options->hash_info > 0)
     {
 
     }
@@ -2278,7 +2285,7 @@ void user_options_extra_init (hashcat_ctx_t *hashcat_ctx)
   {
 
   }
-  else if (user_options->hash_info == true)
+  else if (user_options->hash_info > 0)
   {
 
   }

From 5535077722a3f1f262ad98d599160767c8194489 Mon Sep 17 00:00:00 2001
From: luke <92046606+dunghm19@users.noreply.github.com>
Date: Tue, 13 May 2025 10:45:28 +0700
Subject: [PATCH 23/83] Update test module for m33100 plugin

---
 tools/test_modules/m33100.pm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/test_modules/m33100.pm b/tools/test_modules/m33100.pm
index 03d37d07f..1b5f71637 100644
--- a/tools/test_modules/m33100.pm
+++ b/tools/test_modules/m33100.pm
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 
 ##
-## Author......: Custom Hashcat Test
+## Author......: See docs/credits.txt
 ## License.....: MIT
 ##
 
@@ -17,7 +17,7 @@ sub module_generate_hash
   my $word = shift;
   my $salt = shift;
 
-  my $digest   = md5_hex($salt . md5_hex($word) . $salt);
+  my $digest = md5_hex ($salt . md5_hex ($word) .$salt);
 
   my $hash = sprintf ("%s:%s", $digest, $salt);
 

From 4e71fb00d6f5e9ae134aed8eb2c110ac1fc28e21 Mon Sep 17 00:00:00 2001
From: Technion <technion@lolware.net>
Date: Fri, 16 May 2025 19:54:17 +1000
Subject: [PATCH 24/83] This python script presents a syntax error when
 executed: vmwarevmx2hashcat.py:17: SyntaxWarning: invalid escape sequence
 '\)'   ks_re =
 '.+phrase/(.*?)/pass2key=(.*?):cipher=(.*?):rounds=(.*?):salt=(.*?),(.*?),(.*?)\)'

This fixes that error.
---
 tools/vmwarevmx2hashcat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/vmwarevmx2hashcat.py b/tools/vmwarevmx2hashcat.py
index 015b2a614..f8a004e12 100755
--- a/tools/vmwarevmx2hashcat.py
+++ b/tools/vmwarevmx2hashcat.py
@@ -14,7 +14,7 @@ from binascii import hexlify
 import re
 import base64
 
-ks_re = '.+phrase/(.*?)/pass2key=(.*?):cipher=(.*?):rounds=(.*?):salt=(.*?),(.*?),(.*?)\)'
+ks_re = '.+phrase/(.*?)/pass2key=(.*?):cipher=(.*?):rounds=(.*?):salt=(.*?),(.*?),(.*?)\\)'
 
 ks_struct = {
     'password_hash': None,

From 31a19b9acfea2c2e322752b3fb502256e1250c7a Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Mon, 26 May 2025 20:28:13 +0200
Subject: [PATCH 25/83] Added hash-modes: RIPEMD-320, HMAC-RIPEMD320 (key =
 $pass), HMAC-RIPEMD320 (key = $salt)

---
 OpenCL/inc_hash_ripemd320.cl  | 2279 +++++++++++++++++++++++++++++++++
 OpenCL/inc_hash_ripemd320.h   |  147 +++
 OpenCL/inc_types.h            |  196 +++
 OpenCL/m33600_a0-optimized.cl |  225 ++++
 OpenCL/m33600_a0-pure.cl      |  118 ++
 OpenCL/m33600_a1-optimized.cl |  339 +++++
 OpenCL/m33600_a1-pure.cl      |  112 ++
 OpenCL/m33600_a3-optimized.cl |  447 +++++++
 OpenCL/m33600_a3-pure.cl      |  138 ++
 OpenCL/m33650_a0-pure.cl      |  135 ++
 OpenCL/m33650_a1-pure.cl      |  183 +++
 OpenCL/m33650_a3-pure.cl      |  155 +++
 OpenCL/m33660_a0-pure.cl      |  139 ++
 OpenCL/m33660_a1-pure.cl      |  187 +++
 OpenCL/m33660_a3-pure.cl      |  159 +++
 docs/changes.txt              |    3 +
 docs/readme.txt               |    3 +
 include/types.h               |    1 +
 src/modules/module_33600.c    |  196 +++
 src/modules/module_33650.c    |  223 ++++
 src/modules/module_33660.c    |  223 ++++
 tools/test_modules/m33600.pm  |   42 +
 tools/test_modules/m33650.pm  |   45 +
 tools/test_modules/m33660.pm  |   45 +
 24 files changed, 5740 insertions(+)
 create mode 100644 OpenCL/inc_hash_ripemd320.cl
 create mode 100644 OpenCL/inc_hash_ripemd320.h
 create mode 100644 OpenCL/m33600_a0-optimized.cl
 create mode 100644 OpenCL/m33600_a0-pure.cl
 create mode 100644 OpenCL/m33600_a1-optimized.cl
 create mode 100644 OpenCL/m33600_a1-pure.cl
 create mode 100644 OpenCL/m33600_a3-optimized.cl
 create mode 100644 OpenCL/m33600_a3-pure.cl
 create mode 100644 OpenCL/m33650_a0-pure.cl
 create mode 100644 OpenCL/m33650_a1-pure.cl
 create mode 100644 OpenCL/m33650_a3-pure.cl
 create mode 100644 OpenCL/m33660_a0-pure.cl
 create mode 100644 OpenCL/m33660_a1-pure.cl
 create mode 100644 OpenCL/m33660_a3-pure.cl
 create mode 100644 src/modules/module_33600.c
 create mode 100644 src/modules/module_33650.c
 create mode 100644 src/modules/module_33660.c
 create mode 100644 tools/test_modules/m33600.pm
 create mode 100644 tools/test_modules/m33650.pm
 create mode 100644 tools/test_modules/m33660.pm

diff --git a/OpenCL/inc_hash_ripemd320.cl b/OpenCL/inc_hash_ripemd320.cl
new file mode 100644
index 000000000..96276042b
--- /dev/null
+++ b/OpenCL/inc_hash_ripemd320.cl
@@ -0,0 +1,2279 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+#include "inc_common.h"
+#include "inc_hash_ripemd320.h"
+
+// important notes on this:
+// input buf unused bytes needs to be set to zero
+// input buf needs to be in algorithm native byte order (ripemd320 = LE, sha1 = BE, etc)
+// input buf needs to be 64 byte aligned when using ripemd320_update()
+
+DECLSPEC void ripemd320_transform (PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3, PRIVATE_AS u32 *digest)
+{
+  u32 a1 = digest[0];
+  u32 b1 = digest[1];
+  u32 c1 = digest[2];
+  u32 d1 = digest[3];
+  u32 e1 = digest[4];
+
+  u32 a2 = digest[5];
+  u32 b2 = digest[6];
+  u32 c2 = digest[7];
+  u32 d2 = digest[8];
+  u32 e2 = digest[9];
+
+  u32 tmp = 0;
+
+  RIPEMD320_STEP_S (RIPEMD320_F , a1, b1, c1, d1, e1, w0[0], RIPEMD320C00, RIPEMD320S00);
+  RIPEMD320_STEP_S (RIPEMD320_F , e1, a1, b1, c1, d1, w0[1], RIPEMD320C00, RIPEMD320S01);
+  RIPEMD320_STEP_S (RIPEMD320_F , d1, e1, a1, b1, c1, w0[2], RIPEMD320C00, RIPEMD320S02);
+  RIPEMD320_STEP_S (RIPEMD320_F , c1, d1, e1, a1, b1, w0[3], RIPEMD320C00, RIPEMD320S03);
+  RIPEMD320_STEP_S (RIPEMD320_F , b1, c1, d1, e1, a1, w1[0], RIPEMD320C00, RIPEMD320S04);
+  RIPEMD320_STEP_S (RIPEMD320_F , a1, b1, c1, d1, e1, w1[1], RIPEMD320C00, RIPEMD320S05);
+  RIPEMD320_STEP_S (RIPEMD320_F , e1, a1, b1, c1, d1, w1[2], RIPEMD320C00, RIPEMD320S06);
+  RIPEMD320_STEP_S (RIPEMD320_F , d1, e1, a1, b1, c1, w1[3], RIPEMD320C00, RIPEMD320S07);
+  RIPEMD320_STEP_S (RIPEMD320_F , c1, d1, e1, a1, b1, w2[0], RIPEMD320C00, RIPEMD320S08);
+  RIPEMD320_STEP_S (RIPEMD320_F , b1, c1, d1, e1, a1, w2[1], RIPEMD320C00, RIPEMD320S09);
+  RIPEMD320_STEP_S (RIPEMD320_F , a1, b1, c1, d1, e1, w2[2], RIPEMD320C00, RIPEMD320S0A);
+  RIPEMD320_STEP_S (RIPEMD320_F , e1, a1, b1, c1, d1, w2[3], RIPEMD320C00, RIPEMD320S0B);
+  RIPEMD320_STEP_S (RIPEMD320_F , d1, e1, a1, b1, c1, w3[0], RIPEMD320C00, RIPEMD320S0C);
+  RIPEMD320_STEP_S (RIPEMD320_F , c1, d1, e1, a1, b1, w3[1], RIPEMD320C00, RIPEMD320S0D);
+  RIPEMD320_STEP_S (RIPEMD320_F , b1, c1, d1, e1, a1, w3[2], RIPEMD320C00, RIPEMD320S0E);
+  RIPEMD320_STEP_S (RIPEMD320_F , a1, b1, c1, d1, e1, w3[3], RIPEMD320C00, RIPEMD320S0F);
+
+  RIPEMD320_STEP_S (RIPEMD320_J , a2, b2, c2, d2, e2, w1[1], RIPEMD320C50, RIPEMD320S50);
+  RIPEMD320_STEP_S (RIPEMD320_J , e2, a2, b2, c2, d2, w3[2], RIPEMD320C50, RIPEMD320S51);
+  RIPEMD320_STEP_S (RIPEMD320_J , d2, e2, a2, b2, c2, w1[3], RIPEMD320C50, RIPEMD320S52);
+  RIPEMD320_STEP_S (RIPEMD320_J , c2, d2, e2, a2, b2, w0[0], RIPEMD320C50, RIPEMD320S53);
+  RIPEMD320_STEP_S (RIPEMD320_J , b2, c2, d2, e2, a2, w2[1], RIPEMD320C50, RIPEMD320S54);
+  RIPEMD320_STEP_S (RIPEMD320_J , a2, b2, c2, d2, e2, w0[2], RIPEMD320C50, RIPEMD320S55);
+  RIPEMD320_STEP_S (RIPEMD320_J , e2, a2, b2, c2, d2, w2[3], RIPEMD320C50, RIPEMD320S56);
+  RIPEMD320_STEP_S (RIPEMD320_J , d2, e2, a2, b2, c2, w1[0], RIPEMD320C50, RIPEMD320S57);
+  RIPEMD320_STEP_S (RIPEMD320_J , c2, d2, e2, a2, b2, w3[1], RIPEMD320C50, RIPEMD320S58);
+  RIPEMD320_STEP_S (RIPEMD320_J , b2, c2, d2, e2, a2, w1[2], RIPEMD320C50, RIPEMD320S59);
+  RIPEMD320_STEP_S (RIPEMD320_J , a2, b2, c2, d2, e2, w3[3], RIPEMD320C50, RIPEMD320S5A);
+  RIPEMD320_STEP_S (RIPEMD320_J , e2, a2, b2, c2, d2, w2[0], RIPEMD320C50, RIPEMD320S5B);
+  RIPEMD320_STEP_S (RIPEMD320_J , d2, e2, a2, b2, c2, w0[1], RIPEMD320C50, RIPEMD320S5C);
+  RIPEMD320_STEP_S (RIPEMD320_J , c2, d2, e2, a2, b2, w2[2], RIPEMD320C50, RIPEMD320S5D);
+  RIPEMD320_STEP_S (RIPEMD320_J , b2, c2, d2, e2, a2, w0[3], RIPEMD320C50, RIPEMD320S5E);
+  RIPEMD320_STEP_S (RIPEMD320_J , a2, b2, c2, d2, e2, w3[0], RIPEMD320C50, RIPEMD320S5F);
+
+  tmp = a1; a1 = a2; a2 = tmp;
+
+  RIPEMD320_STEP_S (RIPEMD320_Go, e1, a1, b1, c1, d1, w1[3], RIPEMD320C10, RIPEMD320S10);
+  RIPEMD320_STEP_S (RIPEMD320_Go, d1, e1, a1, b1, c1, w1[0], RIPEMD320C10, RIPEMD320S11);
+  RIPEMD320_STEP_S (RIPEMD320_Go, c1, d1, e1, a1, b1, w3[1], RIPEMD320C10, RIPEMD320S12);
+  RIPEMD320_STEP_S (RIPEMD320_Go, b1, c1, d1, e1, a1, w0[1], RIPEMD320C10, RIPEMD320S13);
+  RIPEMD320_STEP_S (RIPEMD320_Go, a1, b1, c1, d1, e1, w2[2], RIPEMD320C10, RIPEMD320S14);
+  RIPEMD320_STEP_S (RIPEMD320_Go, e1, a1, b1, c1, d1, w1[2], RIPEMD320C10, RIPEMD320S15);
+  RIPEMD320_STEP_S (RIPEMD320_Go, d1, e1, a1, b1, c1, w3[3], RIPEMD320C10, RIPEMD320S16);
+  RIPEMD320_STEP_S (RIPEMD320_Go, c1, d1, e1, a1, b1, w0[3], RIPEMD320C10, RIPEMD320S17);
+  RIPEMD320_STEP_S (RIPEMD320_Go, b1, c1, d1, e1, a1, w3[0], RIPEMD320C10, RIPEMD320S18);
+  RIPEMD320_STEP_S (RIPEMD320_Go, a1, b1, c1, d1, e1, w0[0], RIPEMD320C10, RIPEMD320S19);
+  RIPEMD320_STEP_S (RIPEMD320_Go, e1, a1, b1, c1, d1, w2[1], RIPEMD320C10, RIPEMD320S1A);
+  RIPEMD320_STEP_S (RIPEMD320_Go, d1, e1, a1, b1, c1, w1[1], RIPEMD320C10, RIPEMD320S1B);
+  RIPEMD320_STEP_S (RIPEMD320_Go, c1, d1, e1, a1, b1, w0[2], RIPEMD320C10, RIPEMD320S1C);
+  RIPEMD320_STEP_S (RIPEMD320_Go, b1, c1, d1, e1, a1, w3[2], RIPEMD320C10, RIPEMD320S1D);
+  RIPEMD320_STEP_S (RIPEMD320_Go, a1, b1, c1, d1, e1, w2[3], RIPEMD320C10, RIPEMD320S1E);
+  RIPEMD320_STEP_S (RIPEMD320_Go, e1, a1, b1, c1, d1, w2[0], RIPEMD320C10, RIPEMD320S1F);
+
+  RIPEMD320_STEP_S (RIPEMD320_Io, e2, a2, b2, c2, d2, w1[2], RIPEMD320C60, RIPEMD320S60);
+  RIPEMD320_STEP_S (RIPEMD320_Io, d2, e2, a2, b2, c2, w2[3], RIPEMD320C60, RIPEMD320S61);
+  RIPEMD320_STEP_S (RIPEMD320_Io, c2, d2, e2, a2, b2, w0[3], RIPEMD320C60, RIPEMD320S62);
+  RIPEMD320_STEP_S (RIPEMD320_Io, b2, c2, d2, e2, a2, w1[3], RIPEMD320C60, RIPEMD320S63);
+  RIPEMD320_STEP_S (RIPEMD320_Io, a2, b2, c2, d2, e2, w0[0], RIPEMD320C60, RIPEMD320S64);
+  RIPEMD320_STEP_S (RIPEMD320_Io, e2, a2, b2, c2, d2, w3[1], RIPEMD320C60, RIPEMD320S65);
+  RIPEMD320_STEP_S (RIPEMD320_Io, d2, e2, a2, b2, c2, w1[1], RIPEMD320C60, RIPEMD320S66);
+  RIPEMD320_STEP_S (RIPEMD320_Io, c2, d2, e2, a2, b2, w2[2], RIPEMD320C60, RIPEMD320S67);
+  RIPEMD320_STEP_S (RIPEMD320_Io, b2, c2, d2, e2, a2, w3[2], RIPEMD320C60, RIPEMD320S68);
+  RIPEMD320_STEP_S (RIPEMD320_Io, a2, b2, c2, d2, e2, w3[3], RIPEMD320C60, RIPEMD320S69);
+  RIPEMD320_STEP_S (RIPEMD320_Io, e2, a2, b2, c2, d2, w2[0], RIPEMD320C60, RIPEMD320S6A);
+  RIPEMD320_STEP_S (RIPEMD320_Io, d2, e2, a2, b2, c2, w3[0], RIPEMD320C60, RIPEMD320S6B);
+  RIPEMD320_STEP_S (RIPEMD320_Io, c2, d2, e2, a2, b2, w1[0], RIPEMD320C60, RIPEMD320S6C);
+  RIPEMD320_STEP_S (RIPEMD320_Io, b2, c2, d2, e2, a2, w2[1], RIPEMD320C60, RIPEMD320S6D);
+  RIPEMD320_STEP_S (RIPEMD320_Io, a2, b2, c2, d2, e2, w0[1], RIPEMD320C60, RIPEMD320S6E);
+  RIPEMD320_STEP_S (RIPEMD320_Io, e2, a2, b2, c2, d2, w0[2], RIPEMD320C60, RIPEMD320S6F);
+
+  tmp = b1; b1 = b2; b2 = tmp;
+
+  RIPEMD320_STEP_S (RIPEMD320_H , d1, e1, a1, b1, c1, w0[3], RIPEMD320C20, RIPEMD320S20);
+  RIPEMD320_STEP_S (RIPEMD320_H , c1, d1, e1, a1, b1, w2[2], RIPEMD320C20, RIPEMD320S21);
+  RIPEMD320_STEP_S (RIPEMD320_H , b1, c1, d1, e1, a1, w3[2], RIPEMD320C20, RIPEMD320S22);
+  RIPEMD320_STEP_S (RIPEMD320_H , a1, b1, c1, d1, e1, w1[0], RIPEMD320C20, RIPEMD320S23);
+  RIPEMD320_STEP_S (RIPEMD320_H , e1, a1, b1, c1, d1, w2[1], RIPEMD320C20, RIPEMD320S24);
+  RIPEMD320_STEP_S (RIPEMD320_H , d1, e1, a1, b1, c1, w3[3], RIPEMD320C20, RIPEMD320S25);
+  RIPEMD320_STEP_S (RIPEMD320_H , c1, d1, e1, a1, b1, w2[0], RIPEMD320C20, RIPEMD320S26);
+  RIPEMD320_STEP_S (RIPEMD320_H , b1, c1, d1, e1, a1, w0[1], RIPEMD320C20, RIPEMD320S27);
+  RIPEMD320_STEP_S (RIPEMD320_H , a1, b1, c1, d1, e1, w0[2], RIPEMD320C20, RIPEMD320S28);
+  RIPEMD320_STEP_S (RIPEMD320_H , e1, a1, b1, c1, d1, w1[3], RIPEMD320C20, RIPEMD320S29);
+  RIPEMD320_STEP_S (RIPEMD320_H , d1, e1, a1, b1, c1, w0[0], RIPEMD320C20, RIPEMD320S2A);
+  RIPEMD320_STEP_S (RIPEMD320_H , c1, d1, e1, a1, b1, w1[2], RIPEMD320C20, RIPEMD320S2B);
+  RIPEMD320_STEP_S (RIPEMD320_H , b1, c1, d1, e1, a1, w3[1], RIPEMD320C20, RIPEMD320S2C);
+  RIPEMD320_STEP_S (RIPEMD320_H , a1, b1, c1, d1, e1, w2[3], RIPEMD320C20, RIPEMD320S2D);
+  RIPEMD320_STEP_S (RIPEMD320_H , e1, a1, b1, c1, d1, w1[1], RIPEMD320C20, RIPEMD320S2E);
+  RIPEMD320_STEP_S (RIPEMD320_H , d1, e1, a1, b1, c1, w3[0], RIPEMD320C20, RIPEMD320S2F);
+
+  RIPEMD320_STEP_S (RIPEMD320_H , d2, e2, a2, b2, c2, w3[3], RIPEMD320C70, RIPEMD320S70);
+  RIPEMD320_STEP_S (RIPEMD320_H , c2, d2, e2, a2, b2, w1[1], RIPEMD320C70, RIPEMD320S71);
+  RIPEMD320_STEP_S (RIPEMD320_H , b2, c2, d2, e2, a2, w0[1], RIPEMD320C70, RIPEMD320S72);
+  RIPEMD320_STEP_S (RIPEMD320_H , a2, b2, c2, d2, e2, w0[3], RIPEMD320C70, RIPEMD320S73);
+  RIPEMD320_STEP_S (RIPEMD320_H , e2, a2, b2, c2, d2, w1[3], RIPEMD320C70, RIPEMD320S74);
+  RIPEMD320_STEP_S (RIPEMD320_H , d2, e2, a2, b2, c2, w3[2], RIPEMD320C70, RIPEMD320S75);
+  RIPEMD320_STEP_S (RIPEMD320_H , c2, d2, e2, a2, b2, w1[2], RIPEMD320C70, RIPEMD320S76);
+  RIPEMD320_STEP_S (RIPEMD320_H , b2, c2, d2, e2, a2, w2[1], RIPEMD320C70, RIPEMD320S77);
+  RIPEMD320_STEP_S (RIPEMD320_H , a2, b2, c2, d2, e2, w2[3], RIPEMD320C70, RIPEMD320S78);
+  RIPEMD320_STEP_S (RIPEMD320_H , e2, a2, b2, c2, d2, w2[0], RIPEMD320C70, RIPEMD320S79);
+  RIPEMD320_STEP_S (RIPEMD320_H , d2, e2, a2, b2, c2, w3[0], RIPEMD320C70, RIPEMD320S7A);
+  RIPEMD320_STEP_S (RIPEMD320_H , c2, d2, e2, a2, b2, w0[2], RIPEMD320C70, RIPEMD320S7B);
+  RIPEMD320_STEP_S (RIPEMD320_H , b2, c2, d2, e2, a2, w2[2], RIPEMD320C70, RIPEMD320S7C);
+  RIPEMD320_STEP_S (RIPEMD320_H , a2, b2, c2, d2, e2, w0[0], RIPEMD320C70, RIPEMD320S7D);
+  RIPEMD320_STEP_S (RIPEMD320_H , e2, a2, b2, c2, d2, w1[0], RIPEMD320C70, RIPEMD320S7E);
+  RIPEMD320_STEP_S (RIPEMD320_H , d2, e2, a2, b2, c2, w3[1], RIPEMD320C70, RIPEMD320S7F);
+
+  tmp = c1; c1 = c2; c2 = tmp;
+
+  RIPEMD320_STEP_S (RIPEMD320_Io, c1, d1, e1, a1, b1, w0[1], RIPEMD320C30, RIPEMD320S30);
+  RIPEMD320_STEP_S (RIPEMD320_Io, b1, c1, d1, e1, a1, w2[1], RIPEMD320C30, RIPEMD320S31);
+  RIPEMD320_STEP_S (RIPEMD320_Io, a1, b1, c1, d1, e1, w2[3], RIPEMD320C30, RIPEMD320S32);
+  RIPEMD320_STEP_S (RIPEMD320_Io, e1, a1, b1, c1, d1, w2[2], RIPEMD320C30, RIPEMD320S33);
+  RIPEMD320_STEP_S (RIPEMD320_Io, d1, e1, a1, b1, c1, w0[0], RIPEMD320C30, RIPEMD320S34);
+  RIPEMD320_STEP_S (RIPEMD320_Io, c1, d1, e1, a1, b1, w2[0], RIPEMD320C30, RIPEMD320S35);
+  RIPEMD320_STEP_S (RIPEMD320_Io, b1, c1, d1, e1, a1, w3[0], RIPEMD320C30, RIPEMD320S36);
+  RIPEMD320_STEP_S (RIPEMD320_Io, a1, b1, c1, d1, e1, w1[0], RIPEMD320C30, RIPEMD320S37);
+  RIPEMD320_STEP_S (RIPEMD320_Io, e1, a1, b1, c1, d1, w3[1], RIPEMD320C30, RIPEMD320S38);
+  RIPEMD320_STEP_S (RIPEMD320_Io, d1, e1, a1, b1, c1, w0[3], RIPEMD320C30, RIPEMD320S39);
+  RIPEMD320_STEP_S (RIPEMD320_Io, c1, d1, e1, a1, b1, w1[3], RIPEMD320C30, RIPEMD320S3A);
+  RIPEMD320_STEP_S (RIPEMD320_Io, b1, c1, d1, e1, a1, w3[3], RIPEMD320C30, RIPEMD320S3B);
+  RIPEMD320_STEP_S (RIPEMD320_Io, a1, b1, c1, d1, e1, w3[2], RIPEMD320C30, RIPEMD320S3C);
+  RIPEMD320_STEP_S (RIPEMD320_Io, e1, a1, b1, c1, d1, w1[1], RIPEMD320C30, RIPEMD320S3D);
+  RIPEMD320_STEP_S (RIPEMD320_Io, d1, e1, a1, b1, c1, w1[2], RIPEMD320C30, RIPEMD320S3E);
+  RIPEMD320_STEP_S (RIPEMD320_Io, c1, d1, e1, a1, b1, w0[2], RIPEMD320C30, RIPEMD320S3F);
+
+  RIPEMD320_STEP_S (RIPEMD320_Go, c2, d2, e2, a2, b2, w2[0], RIPEMD320C80, RIPEMD320S80);
+  RIPEMD320_STEP_S (RIPEMD320_Go, b2, c2, d2, e2, a2, w1[2], RIPEMD320C80, RIPEMD320S81);
+  RIPEMD320_STEP_S (RIPEMD320_Go, a2, b2, c2, d2, e2, w1[0], RIPEMD320C80, RIPEMD320S82);
+  RIPEMD320_STEP_S (RIPEMD320_Go, e2, a2, b2, c2, d2, w0[1], RIPEMD320C80, RIPEMD320S83);
+  RIPEMD320_STEP_S (RIPEMD320_Go, d2, e2, a2, b2, c2, w0[3], RIPEMD320C80, RIPEMD320S84);
+  RIPEMD320_STEP_S (RIPEMD320_Go, c2, d2, e2, a2, b2, w2[3], RIPEMD320C80, RIPEMD320S85);
+  RIPEMD320_STEP_S (RIPEMD320_Go, b2, c2, d2, e2, a2, w3[3], RIPEMD320C80, RIPEMD320S86);
+  RIPEMD320_STEP_S (RIPEMD320_Go, a2, b2, c2, d2, e2, w0[0], RIPEMD320C80, RIPEMD320S87);
+  RIPEMD320_STEP_S (RIPEMD320_Go, e2, a2, b2, c2, d2, w1[1], RIPEMD320C80, RIPEMD320S88);
+  RIPEMD320_STEP_S (RIPEMD320_Go, d2, e2, a2, b2, c2, w3[0], RIPEMD320C80, RIPEMD320S89);
+  RIPEMD320_STEP_S (RIPEMD320_Go, c2, d2, e2, a2, b2, w0[2], RIPEMD320C80, RIPEMD320S8A);
+  RIPEMD320_STEP_S (RIPEMD320_Go, b2, c2, d2, e2, a2, w3[1], RIPEMD320C80, RIPEMD320S8B);
+  RIPEMD320_STEP_S (RIPEMD320_Go, a2, b2, c2, d2, e2, w2[1], RIPEMD320C80, RIPEMD320S8C);
+  RIPEMD320_STEP_S (RIPEMD320_Go, e2, a2, b2, c2, d2, w1[3], RIPEMD320C80, RIPEMD320S8D);
+  RIPEMD320_STEP_S (RIPEMD320_Go, d2, e2, a2, b2, c2, w2[2], RIPEMD320C80, RIPEMD320S8E);
+  RIPEMD320_STEP_S (RIPEMD320_Go, c2, d2, e2, a2, b2, w3[2], RIPEMD320C80, RIPEMD320S8F);
+
+  tmp = d1; d1 = d2; d2 = tmp;
+
+  RIPEMD320_STEP_S (RIPEMD320_J , b1, c1, d1, e1, a1, w1[0], RIPEMD320C40, RIPEMD320S40);
+  RIPEMD320_STEP_S (RIPEMD320_J , a1, b1, c1, d1, e1, w0[0], RIPEMD320C40, RIPEMD320S41);
+  RIPEMD320_STEP_S (RIPEMD320_J , e1, a1, b1, c1, d1, w1[1], RIPEMD320C40, RIPEMD320S42);
+  RIPEMD320_STEP_S (RIPEMD320_J , d1, e1, a1, b1, c1, w2[1], RIPEMD320C40, RIPEMD320S43);
+  RIPEMD320_STEP_S (RIPEMD320_J , c1, d1, e1, a1, b1, w1[3], RIPEMD320C40, RIPEMD320S44);
+  RIPEMD320_STEP_S (RIPEMD320_J , b1, c1, d1, e1, a1, w3[0], RIPEMD320C40, RIPEMD320S45);
+  RIPEMD320_STEP_S (RIPEMD320_J , a1, b1, c1, d1, e1, w0[2], RIPEMD320C40, RIPEMD320S46);
+  RIPEMD320_STEP_S (RIPEMD320_J , e1, a1, b1, c1, d1, w2[2], RIPEMD320C40, RIPEMD320S47);
+  RIPEMD320_STEP_S (RIPEMD320_J , d1, e1, a1, b1, c1, w3[2], RIPEMD320C40, RIPEMD320S48);
+  RIPEMD320_STEP_S (RIPEMD320_J , c1, d1, e1, a1, b1, w0[1], RIPEMD320C40, RIPEMD320S49);
+  RIPEMD320_STEP_S (RIPEMD320_J , b1, c1, d1, e1, a1, w0[3], RIPEMD320C40, RIPEMD320S4A);
+  RIPEMD320_STEP_S (RIPEMD320_J , a1, b1, c1, d1, e1, w2[0], RIPEMD320C40, RIPEMD320S4B);
+  RIPEMD320_STEP_S (RIPEMD320_J , e1, a1, b1, c1, d1, w2[3], RIPEMD320C40, RIPEMD320S4C);
+  RIPEMD320_STEP_S (RIPEMD320_J , d1, e1, a1, b1, c1, w1[2], RIPEMD320C40, RIPEMD320S4D);
+  RIPEMD320_STEP_S (RIPEMD320_J , c1, d1, e1, a1, b1, w3[3], RIPEMD320C40, RIPEMD320S4E);
+  RIPEMD320_STEP_S (RIPEMD320_J , b1, c1, d1, e1, a1, w3[1], RIPEMD320C40, RIPEMD320S4F);
+
+  RIPEMD320_STEP_S (RIPEMD320_F , b2, c2, d2, e2, a2, w3[0], RIPEMD320C90, RIPEMD320S90);
+  RIPEMD320_STEP_S (RIPEMD320_F , a2, b2, c2, d2, e2, w3[3], RIPEMD320C90, RIPEMD320S91);
+  RIPEMD320_STEP_S (RIPEMD320_F , e2, a2, b2, c2, d2, w2[2], RIPEMD320C90, RIPEMD320S92);
+  RIPEMD320_STEP_S (RIPEMD320_F , d2, e2, a2, b2, c2, w1[0], RIPEMD320C90, RIPEMD320S93);
+  RIPEMD320_STEP_S (RIPEMD320_F , c2, d2, e2, a2, b2, w0[1], RIPEMD320C90, RIPEMD320S94);
+  RIPEMD320_STEP_S (RIPEMD320_F , b2, c2, d2, e2, a2, w1[1], RIPEMD320C90, RIPEMD320S95);
+  RIPEMD320_STEP_S (RIPEMD320_F , a2, b2, c2, d2, e2, w2[0], RIPEMD320C90, RIPEMD320S96);
+  RIPEMD320_STEP_S (RIPEMD320_F , e2, a2, b2, c2, d2, w1[3], RIPEMD320C90, RIPEMD320S97);
+  RIPEMD320_STEP_S (RIPEMD320_F , d2, e2, a2, b2, c2, w1[2], RIPEMD320C90, RIPEMD320S98);
+  RIPEMD320_STEP_S (RIPEMD320_F , c2, d2, e2, a2, b2, w0[2], RIPEMD320C90, RIPEMD320S99);
+  RIPEMD320_STEP_S (RIPEMD320_F , b2, c2, d2, e2, a2, w3[1], RIPEMD320C90, RIPEMD320S9A);
+  RIPEMD320_STEP_S (RIPEMD320_F , a2, b2, c2, d2, e2, w3[2], RIPEMD320C90, RIPEMD320S9B);
+  RIPEMD320_STEP_S (RIPEMD320_F , e2, a2, b2, c2, d2, w0[0], RIPEMD320C90, RIPEMD320S9C);
+  RIPEMD320_STEP_S (RIPEMD320_F , d2, e2, a2, b2, c2, w0[3], RIPEMD320C90, RIPEMD320S9D);
+  RIPEMD320_STEP_S (RIPEMD320_F , c2, d2, e2, a2, b2, w2[1], RIPEMD320C90, RIPEMD320S9E);
+  RIPEMD320_STEP_S (RIPEMD320_F , b2, c2, d2, e2, a2, w2[3], RIPEMD320C90, RIPEMD320S9F);
+
+  tmp = e1; e1 = e2; e2 = tmp;
+
+  const u32 a = digest[0] + a1;
+  const u32 b = digest[1] + b1;
+  const u32 c = digest[2] + c1;
+  const u32 d = digest[3] + d1;
+  const u32 e = digest[4] + e1;
+  const u32 f = digest[5] + a2;
+  const u32 g = digest[6] + b2;
+  const u32 h = digest[7] + c2;
+  const u32 i = digest[8] + d2;
+  const u32 l = digest[9] + e2;
+
+  digest[0] = a;
+  digest[1] = b;
+  digest[2] = c;
+  digest[3] = d;
+  digest[4] = e;
+  digest[5] = f;
+  digest[6] = g;
+  digest[7] = h;
+  digest[8] = i;
+  digest[9] = l;
+}
+
+DECLSPEC void ripemd320_init (PRIVATE_AS ripemd320_ctx_t *ctx)
+{
+  ctx->h[0] = RIPEMD320M_A;
+  ctx->h[1] = RIPEMD320M_B;
+  ctx->h[2] = RIPEMD320M_C;
+  ctx->h[3] = RIPEMD320M_D;
+  ctx->h[4] = RIPEMD320M_E;
+  ctx->h[5] = RIPEMD320M_F;
+  ctx->h[6] = RIPEMD320M_G;
+  ctx->h[7] = RIPEMD320M_H;
+  ctx->h[8] = RIPEMD320M_I;
+  ctx->h[9] = RIPEMD320M_L;
+
+  ctx->w0[0] = 0;
+  ctx->w0[1] = 0;
+  ctx->w0[2] = 0;
+  ctx->w0[3] = 0;
+  ctx->w1[0] = 0;
+  ctx->w1[1] = 0;
+  ctx->w1[2] = 0;
+  ctx->w1[3] = 0;
+  ctx->w2[0] = 0;
+  ctx->w2[1] = 0;
+  ctx->w2[2] = 0;
+  ctx->w2[3] = 0;
+  ctx->w3[0] = 0;
+  ctx->w3[1] = 0;
+  ctx->w3[2] = 0;
+  ctx->w3[3] = 0;
+
+  ctx->len = 0;
+}
+
+DECLSPEC void ripemd320_update_64 (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len)
+{
+  if (len == 0) return;
+
+  const int pos = ctx->len & 63;
+
+  ctx->len += len;
+
+  if (pos == 0)
+  {
+    ctx->w0[0] = w0[0];
+    ctx->w0[1] = w0[1];
+    ctx->w0[2] = w0[2];
+    ctx->w0[3] = w0[3];
+    ctx->w1[0] = w1[0];
+    ctx->w1[1] = w1[1];
+    ctx->w1[2] = w1[2];
+    ctx->w1[3] = w1[3];
+    ctx->w2[0] = w2[0];
+    ctx->w2[1] = w2[1];
+    ctx->w2[2] = w2[2];
+    ctx->w2[3] = w2[3];
+    ctx->w3[0] = w3[0];
+    ctx->w3[1] = w3[1];
+    ctx->w3[2] = w3[2];
+    ctx->w3[3] = w3[3];
+
+    if (len == 64)
+    {
+      ripemd320_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+
+      ctx->w0[0] = 0;
+      ctx->w0[1] = 0;
+      ctx->w0[2] = 0;
+      ctx->w0[3] = 0;
+      ctx->w1[0] = 0;
+      ctx->w1[1] = 0;
+      ctx->w1[2] = 0;
+      ctx->w1[3] = 0;
+      ctx->w2[0] = 0;
+      ctx->w2[1] = 0;
+      ctx->w2[2] = 0;
+      ctx->w2[3] = 0;
+      ctx->w3[0] = 0;
+      ctx->w3[1] = 0;
+      ctx->w3[2] = 0;
+      ctx->w3[3] = 0;
+    }
+  }
+  else
+  {
+    if ((pos + len) < 64)
+    {
+      switch_buffer_by_offset_le_S (w0, w1, w2, w3, pos);
+
+      ctx->w0[0] |= w0[0];
+      ctx->w0[1] |= w0[1];
+      ctx->w0[2] |= w0[2];
+      ctx->w0[3] |= w0[3];
+      ctx->w1[0] |= w1[0];
+      ctx->w1[1] |= w1[1];
+      ctx->w1[2] |= w1[2];
+      ctx->w1[3] |= w1[3];
+      ctx->w2[0] |= w2[0];
+      ctx->w2[1] |= w2[1];
+      ctx->w2[2] |= w2[2];
+      ctx->w2[3] |= w2[3];
+      ctx->w3[0] |= w3[0];
+      ctx->w3[1] |= w3[1];
+      ctx->w3[2] |= w3[2];
+      ctx->w3[3] |= w3[3];
+    }
+    else
+    {
+      u32 c0[4] = { 0 };
+      u32 c1[4] = { 0 };
+      u32 c2[4] = { 0 };
+      u32 c3[4] = { 0 };
+
+      switch_buffer_by_offset_carry_le_S (w0, w1, w2, w3, c0, c1, c2, c3, pos);
+
+      ctx->w0[0] |= w0[0];
+      ctx->w0[1] |= w0[1];
+      ctx->w0[2] |= w0[2];
+      ctx->w0[3] |= w0[3];
+      ctx->w1[0] |= w1[0];
+      ctx->w1[1] |= w1[1];
+      ctx->w1[2] |= w1[2];
+      ctx->w1[3] |= w1[3];
+      ctx->w2[0] |= w2[0];
+      ctx->w2[1] |= w2[1];
+      ctx->w2[2] |= w2[2];
+      ctx->w2[3] |= w2[3];
+      ctx->w3[0] |= w3[0];
+      ctx->w3[1] |= w3[1];
+      ctx->w3[2] |= w3[2];
+      ctx->w3[3] |= w3[3];
+
+      ripemd320_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+
+      ctx->w0[0] = c0[0];
+      ctx->w0[1] = c0[1];
+      ctx->w0[2] = c0[2];
+      ctx->w0[3] = c0[3];
+      ctx->w1[0] = c1[0];
+      ctx->w1[1] = c1[1];
+      ctx->w1[2] = c1[2];
+      ctx->w1[3] = c1[3];
+      ctx->w2[0] = c2[0];
+      ctx->w2[1] = c2[1];
+      ctx->w2[2] = c2[2];
+      ctx->w2[3] = c2[3];
+      ctx->w3[0] = c3[0];
+      ctx->w3[1] = c3[1];
+      ctx->w3[2] = c3[2];
+      ctx->w3[3] = c3[3];
+    }
+  }
+}
+
+DECLSPEC void ripemd320_update (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, len - pos1);
+}
+
+DECLSPEC void ripemd320_update_swap (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    w0[0] = hc_swap32_S (w0[0]);
+    w0[1] = hc_swap32_S (w0[1]);
+    w0[2] = hc_swap32_S (w0[2]);
+    w0[3] = hc_swap32_S (w0[3]);
+    w1[0] = hc_swap32_S (w1[0]);
+    w1[1] = hc_swap32_S (w1[1]);
+    w1[2] = hc_swap32_S (w1[2]);
+    w1[3] = hc_swap32_S (w1[3]);
+    w2[0] = hc_swap32_S (w2[0]);
+    w2[1] = hc_swap32_S (w2[1]);
+    w2[2] = hc_swap32_S (w2[2]);
+    w2[3] = hc_swap32_S (w2[3]);
+    w3[0] = hc_swap32_S (w3[0]);
+    w3[1] = hc_swap32_S (w3[1]);
+    w3[2] = hc_swap32_S (w3[2]);
+    w3[3] = hc_swap32_S (w3[3]);
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  w0[0] = hc_swap32_S (w0[0]);
+  w0[1] = hc_swap32_S (w0[1]);
+  w0[2] = hc_swap32_S (w0[2]);
+  w0[3] = hc_swap32_S (w0[3]);
+  w1[0] = hc_swap32_S (w1[0]);
+  w1[1] = hc_swap32_S (w1[1]);
+  w1[2] = hc_swap32_S (w1[2]);
+  w1[3] = hc_swap32_S (w1[3]);
+  w2[0] = hc_swap32_S (w2[0]);
+  w2[1] = hc_swap32_S (w2[1]);
+  w2[2] = hc_swap32_S (w2[2]);
+  w2[3] = hc_swap32_S (w2[3]);
+  w3[0] = hc_swap32_S (w3[0]);
+  w3[1] = hc_swap32_S (w3[1]);
+  w3[2] = hc_swap32_S (w3[2]);
+  w3[3] = hc_swap32_S (w3[3]);
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, len - pos1);
+}
+
+DECLSPEC void ripemd320_update_utf16le (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  if (hc_enc_scan (w, len))
+  {
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    while (hc_enc_has_next (&hc_enc, len))
+    {
+      u32 enc_buf[16] = { 0 };
+
+      const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
+
+      if (enc_len == -1)
+      {
+        ctx->len = -1;
+
+        return;
+      }
+
+      ripemd320_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
+    }
+
+    return;
+  }
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
+  {
+    w0[0] = w[pos4 + 0];
+    w0[1] = w[pos4 + 1];
+    w0[2] = w[pos4 + 2];
+    w0[3] = w[pos4 + 3];
+    w1[0] = w[pos4 + 4];
+    w1[1] = w[pos4 + 5];
+    w1[2] = w[pos4 + 6];
+    w1[3] = w[pos4 + 7];
+
+    make_utf16le_S (w1, w2, w3);
+    make_utf16le_S (w0, w0, w1);
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
+  }
+
+  w0[0] = w[pos4 + 0];
+  w0[1] = w[pos4 + 1];
+  w0[2] = w[pos4 + 2];
+  w0[3] = w[pos4 + 3];
+  w1[0] = w[pos4 + 4];
+  w1[1] = w[pos4 + 5];
+  w1[2] = w[pos4 + 6];
+  w1[3] = w[pos4 + 7];
+
+  make_utf16le_S (w1, w2, w3);
+  make_utf16le_S (w0, w0, w1);
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
+}
+
+DECLSPEC void ripemd320_update_utf16le_swap (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  if (hc_enc_scan (w, len))
+  {
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    while (hc_enc_has_next (&hc_enc, len))
+    {
+      u32 enc_buf[16] = { 0 };
+
+      const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
+
+      if (enc_len == -1)
+      {
+        ctx->len = -1;
+
+        return;
+      }
+
+      enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
+      enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
+      enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
+      enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
+      enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
+      enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
+      enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
+      enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
+      enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
+      enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
+      enc_buf[10] = hc_swap32_S (enc_buf[10]);
+      enc_buf[11] = hc_swap32_S (enc_buf[11]);
+      enc_buf[12] = hc_swap32_S (enc_buf[12]);
+      enc_buf[13] = hc_swap32_S (enc_buf[13]);
+      enc_buf[14] = hc_swap32_S (enc_buf[14]);
+      enc_buf[15] = hc_swap32_S (enc_buf[15]);
+
+      ripemd320_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
+    }
+
+    return;
+  }
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
+  {
+    w0[0] = w[pos4 + 0];
+    w0[1] = w[pos4 + 1];
+    w0[2] = w[pos4 + 2];
+    w0[3] = w[pos4 + 3];
+    w1[0] = w[pos4 + 4];
+    w1[1] = w[pos4 + 5];
+    w1[2] = w[pos4 + 6];
+    w1[3] = w[pos4 + 7];
+
+    make_utf16le_S (w1, w2, w3);
+    make_utf16le_S (w0, w0, w1);
+
+    w0[0] = hc_swap32_S (w0[0]);
+    w0[1] = hc_swap32_S (w0[1]);
+    w0[2] = hc_swap32_S (w0[2]);
+    w0[3] = hc_swap32_S (w0[3]);
+    w1[0] = hc_swap32_S (w1[0]);
+    w1[1] = hc_swap32_S (w1[1]);
+    w1[2] = hc_swap32_S (w1[2]);
+    w1[3] = hc_swap32_S (w1[3]);
+    w2[0] = hc_swap32_S (w2[0]);
+    w2[1] = hc_swap32_S (w2[1]);
+    w2[2] = hc_swap32_S (w2[2]);
+    w2[3] = hc_swap32_S (w2[3]);
+    w3[0] = hc_swap32_S (w3[0]);
+    w3[1] = hc_swap32_S (w3[1]);
+    w3[2] = hc_swap32_S (w3[2]);
+    w3[3] = hc_swap32_S (w3[3]);
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
+  }
+
+  w0[0] = w[pos4 + 0];
+  w0[1] = w[pos4 + 1];
+  w0[2] = w[pos4 + 2];
+  w0[3] = w[pos4 + 3];
+  w1[0] = w[pos4 + 4];
+  w1[1] = w[pos4 + 5];
+  w1[2] = w[pos4 + 6];
+  w1[3] = w[pos4 + 7];
+
+  make_utf16le_S (w1, w2, w3);
+  make_utf16le_S (w0, w0, w1);
+
+  w0[0] = hc_swap32_S (w0[0]);
+  w0[1] = hc_swap32_S (w0[1]);
+  w0[2] = hc_swap32_S (w0[2]);
+  w0[3] = hc_swap32_S (w0[3]);
+  w1[0] = hc_swap32_S (w1[0]);
+  w1[1] = hc_swap32_S (w1[1]);
+  w1[2] = hc_swap32_S (w1[2]);
+  w1[3] = hc_swap32_S (w1[3]);
+  w2[0] = hc_swap32_S (w2[0]);
+  w2[1] = hc_swap32_S (w2[1]);
+  w2[2] = hc_swap32_S (w2[2]);
+  w2[3] = hc_swap32_S (w2[3]);
+  w3[0] = hc_swap32_S (w3[0]);
+  w3[1] = hc_swap32_S (w3[1]);
+  w3[2] = hc_swap32_S (w3[2]);
+  w3[3] = hc_swap32_S (w3[3]);
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
+}
+
+DECLSPEC void ripemd320_update_global (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, len - pos1);
+}
+
+DECLSPEC void ripemd320_update_global_swap (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    w0[0] = hc_swap32_S (w0[0]);
+    w0[1] = hc_swap32_S (w0[1]);
+    w0[2] = hc_swap32_S (w0[2]);
+    w0[3] = hc_swap32_S (w0[3]);
+    w1[0] = hc_swap32_S (w1[0]);
+    w1[1] = hc_swap32_S (w1[1]);
+    w1[2] = hc_swap32_S (w1[2]);
+    w1[3] = hc_swap32_S (w1[3]);
+    w2[0] = hc_swap32_S (w2[0]);
+    w2[1] = hc_swap32_S (w2[1]);
+    w2[2] = hc_swap32_S (w2[2]);
+    w2[3] = hc_swap32_S (w2[3]);
+    w3[0] = hc_swap32_S (w3[0]);
+    w3[1] = hc_swap32_S (w3[1]);
+    w3[2] = hc_swap32_S (w3[2]);
+    w3[3] = hc_swap32_S (w3[3]);
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  w0[0] = hc_swap32_S (w0[0]);
+  w0[1] = hc_swap32_S (w0[1]);
+  w0[2] = hc_swap32_S (w0[2]);
+  w0[3] = hc_swap32_S (w0[3]);
+  w1[0] = hc_swap32_S (w1[0]);
+  w1[1] = hc_swap32_S (w1[1]);
+  w1[2] = hc_swap32_S (w1[2]);
+  w1[3] = hc_swap32_S (w1[3]);
+  w2[0] = hc_swap32_S (w2[0]);
+  w2[1] = hc_swap32_S (w2[1]);
+  w2[2] = hc_swap32_S (w2[2]);
+  w2[3] = hc_swap32_S (w2[3]);
+  w3[0] = hc_swap32_S (w3[0]);
+  w3[1] = hc_swap32_S (w3[1]);
+  w3[2] = hc_swap32_S (w3[2]);
+  w3[3] = hc_swap32_S (w3[3]);
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, len - pos1);
+}
+
+DECLSPEC void ripemd320_update_global_utf16le (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  if (hc_enc_scan_global (w, len))
+  {
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    while (hc_enc_has_next (&hc_enc, len))
+    {
+      u32 enc_buf[16] = { 0 };
+
+      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
+
+      if (enc_len == -1)
+      {
+        ctx->len = -1;
+
+        return;
+      }
+
+      ripemd320_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
+    }
+
+    return;
+  }
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
+  {
+    w0[0] = w[pos4 + 0];
+    w0[1] = w[pos4 + 1];
+    w0[2] = w[pos4 + 2];
+    w0[3] = w[pos4 + 3];
+    w1[0] = w[pos4 + 4];
+    w1[1] = w[pos4 + 5];
+    w1[2] = w[pos4 + 6];
+    w1[3] = w[pos4 + 7];
+
+    make_utf16le_S (w1, w2, w3);
+    make_utf16le_S (w0, w0, w1);
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
+  }
+
+  w0[0] = w[pos4 + 0];
+  w0[1] = w[pos4 + 1];
+  w0[2] = w[pos4 + 2];
+  w0[3] = w[pos4 + 3];
+  w1[0] = w[pos4 + 4];
+  w1[1] = w[pos4 + 5];
+  w1[2] = w[pos4 + 6];
+  w1[3] = w[pos4 + 7];
+
+  make_utf16le_S (w1, w2, w3);
+  make_utf16le_S (w0, w0, w1);
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
+}
+
+DECLSPEC void ripemd320_update_global_utf16le_swap (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  if (hc_enc_scan_global (w, len))
+  {
+    hc_enc_t hc_enc;
+
+    hc_enc_init (&hc_enc);
+
+    while (hc_enc_has_next (&hc_enc, len))
+    {
+      u32 enc_buf[16] = { 0 };
+
+      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
+
+      if (enc_len == -1)
+      {
+        ctx->len = -1;
+
+        return;
+      }
+
+      enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
+      enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
+      enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
+      enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
+      enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
+      enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
+      enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
+      enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
+      enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
+      enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
+      enc_buf[10] = hc_swap32_S (enc_buf[10]);
+      enc_buf[11] = hc_swap32_S (enc_buf[11]);
+      enc_buf[12] = hc_swap32_S (enc_buf[12]);
+      enc_buf[13] = hc_swap32_S (enc_buf[13]);
+      enc_buf[14] = hc_swap32_S (enc_buf[14]);
+      enc_buf[15] = hc_swap32_S (enc_buf[15]);
+
+      ripemd320_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
+    }
+
+    return;
+  }
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
+  {
+    w0[0] = w[pos4 + 0];
+    w0[1] = w[pos4 + 1];
+    w0[2] = w[pos4 + 2];
+    w0[3] = w[pos4 + 3];
+    w1[0] = w[pos4 + 4];
+    w1[1] = w[pos4 + 5];
+    w1[2] = w[pos4 + 6];
+    w1[3] = w[pos4 + 7];
+
+    make_utf16le_S (w1, w2, w3);
+    make_utf16le_S (w0, w0, w1);
+
+    w0[0] = hc_swap32_S (w0[0]);
+    w0[1] = hc_swap32_S (w0[1]);
+    w0[2] = hc_swap32_S (w0[2]);
+    w0[3] = hc_swap32_S (w0[3]);
+    w1[0] = hc_swap32_S (w1[0]);
+    w1[1] = hc_swap32_S (w1[1]);
+    w1[2] = hc_swap32_S (w1[2]);
+    w1[3] = hc_swap32_S (w1[3]);
+    w2[0] = hc_swap32_S (w2[0]);
+    w2[1] = hc_swap32_S (w2[1]);
+    w2[2] = hc_swap32_S (w2[2]);
+    w2[3] = hc_swap32_S (w2[3]);
+    w3[0] = hc_swap32_S (w3[0]);
+    w3[1] = hc_swap32_S (w3[1]);
+    w3[2] = hc_swap32_S (w3[2]);
+    w3[3] = hc_swap32_S (w3[3]);
+
+    ripemd320_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
+  }
+
+  w0[0] = w[pos4 + 0];
+  w0[1] = w[pos4 + 1];
+  w0[2] = w[pos4 + 2];
+  w0[3] = w[pos4 + 3];
+  w1[0] = w[pos4 + 4];
+  w1[1] = w[pos4 + 5];
+  w1[2] = w[pos4 + 6];
+  w1[3] = w[pos4 + 7];
+
+  make_utf16le_S (w1, w2, w3);
+  make_utf16le_S (w0, w0, w1);
+
+  w0[0] = hc_swap32_S (w0[0]);
+  w0[1] = hc_swap32_S (w0[1]);
+  w0[2] = hc_swap32_S (w0[2]);
+  w0[3] = hc_swap32_S (w0[3]);
+  w1[0] = hc_swap32_S (w1[0]);
+  w1[1] = hc_swap32_S (w1[1]);
+  w1[2] = hc_swap32_S (w1[2]);
+  w1[3] = hc_swap32_S (w1[3]);
+  w2[0] = hc_swap32_S (w2[0]);
+  w2[1] = hc_swap32_S (w2[1]);
+  w2[2] = hc_swap32_S (w2[2]);
+  w2[3] = hc_swap32_S (w2[3]);
+  w3[0] = hc_swap32_S (w3[0]);
+  w3[1] = hc_swap32_S (w3[1]);
+  w3[2] = hc_swap32_S (w3[2]);
+  w3[3] = hc_swap32_S (w3[3]);
+
+  ripemd320_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
+}
+
+DECLSPEC void ripemd320_final (PRIVATE_AS ripemd320_ctx_t *ctx)
+{
+  const int pos = ctx->len & 63;
+
+  append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
+
+  if (pos >= 56)
+  {
+    ripemd320_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+
+    ctx->w0[0] = 0;
+    ctx->w0[1] = 0;
+    ctx->w0[2] = 0;
+    ctx->w0[3] = 0;
+    ctx->w1[0] = 0;
+    ctx->w1[1] = 0;
+    ctx->w1[2] = 0;
+    ctx->w1[3] = 0;
+    ctx->w2[0] = 0;
+    ctx->w2[1] = 0;
+    ctx->w2[2] = 0;
+    ctx->w2[3] = 0;
+    ctx->w3[0] = 0;
+    ctx->w3[1] = 0;
+    ctx->w3[2] = 0;
+    ctx->w3[3] = 0;
+  }
+
+  ctx->w3[2] = ctx->len * 8;
+  ctx->w3[3] = 0;
+
+  ripemd320_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+}
+
+// ripemd320_hmac
+
+DECLSPEC void ripemd320_hmac_init_64 (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3)
+{
+  u32 a0[4];
+  u32 a1[4];
+  u32 a2[4];
+  u32 a3[4];
+
+  // ipad
+
+  a0[0] = w0[0] ^ 0x36363636;
+  a0[1] = w0[1] ^ 0x36363636;
+  a0[2] = w0[2] ^ 0x36363636;
+  a0[3] = w0[3] ^ 0x36363636;
+  a1[0] = w1[0] ^ 0x36363636;
+  a1[1] = w1[1] ^ 0x36363636;
+  a1[2] = w1[2] ^ 0x36363636;
+  a1[3] = w1[3] ^ 0x36363636;
+  a2[0] = w2[0] ^ 0x36363636;
+  a2[1] = w2[1] ^ 0x36363636;
+  a2[2] = w2[2] ^ 0x36363636;
+  a2[3] = w2[3] ^ 0x36363636;
+  a3[0] = w3[0] ^ 0x36363636;
+  a3[1] = w3[1] ^ 0x36363636;
+  a3[2] = w3[2] ^ 0x36363636;
+  a3[3] = w3[3] ^ 0x36363636;
+
+  ripemd320_init (&ctx->ipad);
+
+  ripemd320_update_64 (&ctx->ipad, a0, a1, a2, a3, 64);
+
+  // opad
+
+  u32 b0[4];
+  u32 b1[4];
+  u32 b2[4];
+  u32 b3[4];
+
+  b0[0] = w0[0] ^ 0x5c5c5c5c;
+  b0[1] = w0[1] ^ 0x5c5c5c5c;
+  b0[2] = w0[2] ^ 0x5c5c5c5c;
+  b0[3] = w0[3] ^ 0x5c5c5c5c;
+  b1[0] = w1[0] ^ 0x5c5c5c5c;
+  b1[1] = w1[1] ^ 0x5c5c5c5c;
+  b1[2] = w1[2] ^ 0x5c5c5c5c;
+  b1[3] = w1[3] ^ 0x5c5c5c5c;
+  b2[0] = w2[0] ^ 0x5c5c5c5c;
+  b2[1] = w2[1] ^ 0x5c5c5c5c;
+  b2[2] = w2[2] ^ 0x5c5c5c5c;
+  b2[3] = w2[3] ^ 0x5c5c5c5c;
+  b3[0] = w3[0] ^ 0x5c5c5c5c;
+  b3[1] = w3[1] ^ 0x5c5c5c5c;
+  b3[2] = w3[2] ^ 0x5c5c5c5c;
+  b3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  ripemd320_init (&ctx->opad);
+
+  ripemd320_update_64 (&ctx->opad, b0, b1, b2, b3, 64);
+}
+
+DECLSPEC void ripemd320_hmac_init (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  if (len > 64)
+  {
+    ripemd320_ctx_t tmp;
+
+    ripemd320_init (&tmp);
+
+    ripemd320_update (&tmp, w, len);
+
+    ripemd320_final (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = tmp.h[5];
+    w1[2] = tmp.h[6];
+    w1[3] = tmp.h[7];
+    w2[0] = tmp.h[8];
+    w2[1] = tmp.h[9];
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  ripemd320_hmac_init_64 (ctx, w0, w1, w2, w3);
+}
+
+DECLSPEC void ripemd320_hmac_init_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  if (len > 64)
+  {
+    ripemd320_ctx_t tmp;
+
+    ripemd320_init (&tmp);
+
+    ripemd320_update_swap (&tmp, w, len);
+
+    ripemd320_final (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = tmp.h[5];
+    w1[2] = tmp.h[6];
+    w1[3] = tmp.h[7];
+    w2[0] = tmp.h[8];
+    w2[1] = tmp.h[9];
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = hc_swap32_S (w[ 0]);
+    w0[1] = hc_swap32_S (w[ 1]);
+    w0[2] = hc_swap32_S (w[ 2]);
+    w0[3] = hc_swap32_S (w[ 3]);
+    w1[0] = hc_swap32_S (w[ 4]);
+    w1[1] = hc_swap32_S (w[ 5]);
+    w1[2] = hc_swap32_S (w[ 6]);
+    w1[3] = hc_swap32_S (w[ 7]);
+    w2[0] = hc_swap32_S (w[ 8]);
+    w2[1] = hc_swap32_S (w[ 9]);
+    w2[2] = hc_swap32_S (w[10]);
+    w2[3] = hc_swap32_S (w[11]);
+    w3[0] = hc_swap32_S (w[12]);
+    w3[1] = hc_swap32_S (w[13]);
+    w3[2] = hc_swap32_S (w[14]);
+    w3[3] = hc_swap32_S (w[15]);
+  }
+
+  ripemd320_hmac_init_64 (ctx, w0, w1, w2, w3);
+}
+
+DECLSPEC void ripemd320_hmac_init_global (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  if (len > 64)
+  {
+    ripemd320_ctx_t tmp;
+
+    ripemd320_init (&tmp);
+
+    ripemd320_update_global (&tmp, w, len);
+
+    ripemd320_final (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = tmp.h[5];
+    w1[2] = tmp.h[6];
+    w1[3] = tmp.h[7];
+    w2[0] = tmp.h[8];
+    w2[1] = tmp.h[9];
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  ripemd320_hmac_init_64 (ctx, w0, w1, w2, w3);
+}
+
+DECLSPEC void ripemd320_hmac_init_global_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  if (len > 64)
+  {
+    ripemd320_ctx_t tmp;
+
+    ripemd320_init (&tmp);
+
+    ripemd320_update_global_swap (&tmp, w, len);
+
+    ripemd320_final (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = tmp.h[5];
+    w1[2] = tmp.h[6];
+    w1[3] = tmp.h[7];
+    w2[0] = tmp.h[8];
+    w2[1] = tmp.h[9];
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = hc_swap32_S (w[ 0]);
+    w0[1] = hc_swap32_S (w[ 1]);
+    w0[2] = hc_swap32_S (w[ 2]);
+    w0[3] = hc_swap32_S (w[ 3]);
+    w1[0] = hc_swap32_S (w[ 4]);
+    w1[1] = hc_swap32_S (w[ 5]);
+    w1[2] = hc_swap32_S (w[ 6]);
+    w1[3] = hc_swap32_S (w[ 7]);
+    w2[0] = hc_swap32_S (w[ 8]);
+    w2[1] = hc_swap32_S (w[ 9]);
+    w2[2] = hc_swap32_S (w[10]);
+    w2[3] = hc_swap32_S (w[11]);
+    w3[0] = hc_swap32_S (w[12]);
+    w3[1] = hc_swap32_S (w[13]);
+    w3[2] = hc_swap32_S (w[14]);
+    w3[3] = hc_swap32_S (w[15]);
+  }
+
+  ripemd320_hmac_init_64 (ctx, w0, w1, w2, w3);
+}
+
+DECLSPEC void ripemd320_hmac_update_64 (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len)
+{
+  ripemd320_update_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+DECLSPEC void ripemd320_hmac_update (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  ripemd320_update (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  ripemd320_update_swap (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_utf16le (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  ripemd320_update_utf16le (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_utf16le_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  ripemd320_update_utf16le_swap (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_global (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  ripemd320_update_global (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_global_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  ripemd320_update_global_swap (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_global_utf16le (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  ripemd320_update_global_utf16le (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_global_utf16le_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  ripemd320_update_global_utf16le_swap (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_final (PRIVATE_AS ripemd320_hmac_ctx_t *ctx)
+{
+  ripemd320_final (&ctx->ipad);
+
+  ctx->opad.w0[0] = ctx->ipad.h[0];
+  ctx->opad.w0[1] = ctx->ipad.h[1];
+  ctx->opad.w0[2] = ctx->ipad.h[2];
+  ctx->opad.w0[3] = ctx->ipad.h[3];
+  ctx->opad.w1[0] = ctx->ipad.h[4];
+  ctx->opad.w1[1] = ctx->ipad.h[5];
+  ctx->opad.w1[2] = ctx->ipad.h[6];
+  ctx->opad.w1[3] = ctx->ipad.h[7];
+  ctx->opad.w2[0] = ctx->ipad.h[8];
+  ctx->opad.w2[1] = ctx->ipad.h[9];
+  ctx->opad.w2[2] = 0;
+  ctx->opad.w2[3] = 0;
+  ctx->opad.w3[0] = 0;
+  ctx->opad.w3[1] = 0;
+  ctx->opad.w3[2] = 0;
+  ctx->opad.w3[3] = 0;
+
+  ctx->opad.len += 40;
+
+  ripemd320_final (&ctx->opad);
+}
+
+// while input buf can be a vector datatype, the length of the different elements can not
+
+DECLSPEC void ripemd320_transform_vector (PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3, PRIVATE_AS u32x *digest)
+{
+  u32x a1 = digest[0];
+  u32x b1 = digest[1];
+  u32x c1 = digest[2];
+  u32x d1 = digest[3];
+  u32x e1 = digest[4];
+
+  u32x a2 = digest[5];
+  u32x b2 = digest[6];
+  u32x c2 = digest[7];
+  u32x d2 = digest[8];
+  u32x e2 = digest[9];
+
+  u32x tmp = 0;
+
+  RIPEMD320_STEP (RIPEMD320_F , a1, b1, c1, d1, e1, w0[0], RIPEMD320C00, RIPEMD320S00);
+  RIPEMD320_STEP (RIPEMD320_F , e1, a1, b1, c1, d1, w0[1], RIPEMD320C00, RIPEMD320S01);
+  RIPEMD320_STEP (RIPEMD320_F , d1, e1, a1, b1, c1, w0[2], RIPEMD320C00, RIPEMD320S02);
+  RIPEMD320_STEP (RIPEMD320_F , c1, d1, e1, a1, b1, w0[3], RIPEMD320C00, RIPEMD320S03);
+  RIPEMD320_STEP (RIPEMD320_F , b1, c1, d1, e1, a1, w1[0], RIPEMD320C00, RIPEMD320S04);
+  RIPEMD320_STEP (RIPEMD320_F , a1, b1, c1, d1, e1, w1[1], RIPEMD320C00, RIPEMD320S05);
+  RIPEMD320_STEP (RIPEMD320_F , e1, a1, b1, c1, d1, w1[2], RIPEMD320C00, RIPEMD320S06);
+  RIPEMD320_STEP (RIPEMD320_F , d1, e1, a1, b1, c1, w1[3], RIPEMD320C00, RIPEMD320S07);
+  RIPEMD320_STEP (RIPEMD320_F , c1, d1, e1, a1, b1, w2[0], RIPEMD320C00, RIPEMD320S08);
+  RIPEMD320_STEP (RIPEMD320_F , b1, c1, d1, e1, a1, w2[1], RIPEMD320C00, RIPEMD320S09);
+  RIPEMD320_STEP (RIPEMD320_F , a1, b1, c1, d1, e1, w2[2], RIPEMD320C00, RIPEMD320S0A);
+  RIPEMD320_STEP (RIPEMD320_F , e1, a1, b1, c1, d1, w2[3], RIPEMD320C00, RIPEMD320S0B);
+  RIPEMD320_STEP (RIPEMD320_F , d1, e1, a1, b1, c1, w3[0], RIPEMD320C00, RIPEMD320S0C);
+  RIPEMD320_STEP (RIPEMD320_F , c1, d1, e1, a1, b1, w3[1], RIPEMD320C00, RIPEMD320S0D);
+  RIPEMD320_STEP (RIPEMD320_F , b1, c1, d1, e1, a1, w3[2], RIPEMD320C00, RIPEMD320S0E);
+  RIPEMD320_STEP (RIPEMD320_F , a1, b1, c1, d1, e1, w3[3], RIPEMD320C00, RIPEMD320S0F);
+
+  RIPEMD320_STEP (RIPEMD320_J , a2, b2, c2, d2, e2, w1[1], RIPEMD320C50, RIPEMD320S50);
+  RIPEMD320_STEP (RIPEMD320_J , e2, a2, b2, c2, d2, w3[2], RIPEMD320C50, RIPEMD320S51);
+  RIPEMD320_STEP (RIPEMD320_J , d2, e2, a2, b2, c2, w1[3], RIPEMD320C50, RIPEMD320S52);
+  RIPEMD320_STEP (RIPEMD320_J , c2, d2, e2, a2, b2, w0[0], RIPEMD320C50, RIPEMD320S53);
+  RIPEMD320_STEP (RIPEMD320_J , b2, c2, d2, e2, a2, w2[1], RIPEMD320C50, RIPEMD320S54);
+  RIPEMD320_STEP (RIPEMD320_J , a2, b2, c2, d2, e2, w0[2], RIPEMD320C50, RIPEMD320S55);
+  RIPEMD320_STEP (RIPEMD320_J , e2, a2, b2, c2, d2, w2[3], RIPEMD320C50, RIPEMD320S56);
+  RIPEMD320_STEP (RIPEMD320_J , d2, e2, a2, b2, c2, w1[0], RIPEMD320C50, RIPEMD320S57);
+  RIPEMD320_STEP (RIPEMD320_J , c2, d2, e2, a2, b2, w3[1], RIPEMD320C50, RIPEMD320S58);
+  RIPEMD320_STEP (RIPEMD320_J , b2, c2, d2, e2, a2, w1[2], RIPEMD320C50, RIPEMD320S59);
+  RIPEMD320_STEP (RIPEMD320_J , a2, b2, c2, d2, e2, w3[3], RIPEMD320C50, RIPEMD320S5A);
+  RIPEMD320_STEP (RIPEMD320_J , e2, a2, b2, c2, d2, w2[0], RIPEMD320C50, RIPEMD320S5B);
+  RIPEMD320_STEP (RIPEMD320_J , d2, e2, a2, b2, c2, w0[1], RIPEMD320C50, RIPEMD320S5C);
+  RIPEMD320_STEP (RIPEMD320_J , c2, d2, e2, a2, b2, w2[2], RIPEMD320C50, RIPEMD320S5D);
+  RIPEMD320_STEP (RIPEMD320_J , b2, c2, d2, e2, a2, w0[3], RIPEMD320C50, RIPEMD320S5E);
+  RIPEMD320_STEP (RIPEMD320_J , a2, b2, c2, d2, e2, w3[0], RIPEMD320C50, RIPEMD320S5F);
+
+  tmp = a1; a1 = a2; a2 = tmp;
+
+  RIPEMD320_STEP (RIPEMD320_Go, e1, a1, b1, c1, d1, w1[3], RIPEMD320C10, RIPEMD320S10);
+  RIPEMD320_STEP (RIPEMD320_Go, d1, e1, a1, b1, c1, w1[0], RIPEMD320C10, RIPEMD320S11);
+  RIPEMD320_STEP (RIPEMD320_Go, c1, d1, e1, a1, b1, w3[1], RIPEMD320C10, RIPEMD320S12);
+  RIPEMD320_STEP (RIPEMD320_Go, b1, c1, d1, e1, a1, w0[1], RIPEMD320C10, RIPEMD320S13);
+  RIPEMD320_STEP (RIPEMD320_Go, a1, b1, c1, d1, e1, w2[2], RIPEMD320C10, RIPEMD320S14);
+  RIPEMD320_STEP (RIPEMD320_Go, e1, a1, b1, c1, d1, w1[2], RIPEMD320C10, RIPEMD320S15);
+  RIPEMD320_STEP (RIPEMD320_Go, d1, e1, a1, b1, c1, w3[3], RIPEMD320C10, RIPEMD320S16);
+  RIPEMD320_STEP (RIPEMD320_Go, c1, d1, e1, a1, b1, w0[3], RIPEMD320C10, RIPEMD320S17);
+  RIPEMD320_STEP (RIPEMD320_Go, b1, c1, d1, e1, a1, w3[0], RIPEMD320C10, RIPEMD320S18);
+  RIPEMD320_STEP (RIPEMD320_Go, a1, b1, c1, d1, e1, w0[0], RIPEMD320C10, RIPEMD320S19);
+  RIPEMD320_STEP (RIPEMD320_Go, e1, a1, b1, c1, d1, w2[1], RIPEMD320C10, RIPEMD320S1A);
+  RIPEMD320_STEP (RIPEMD320_Go, d1, e1, a1, b1, c1, w1[1], RIPEMD320C10, RIPEMD320S1B);
+  RIPEMD320_STEP (RIPEMD320_Go, c1, d1, e1, a1, b1, w0[2], RIPEMD320C10, RIPEMD320S1C);
+  RIPEMD320_STEP (RIPEMD320_Go, b1, c1, d1, e1, a1, w3[2], RIPEMD320C10, RIPEMD320S1D);
+  RIPEMD320_STEP (RIPEMD320_Go, a1, b1, c1, d1, e1, w2[3], RIPEMD320C10, RIPEMD320S1E);
+  RIPEMD320_STEP (RIPEMD320_Go, e1, a1, b1, c1, d1, w2[0], RIPEMD320C10, RIPEMD320S1F);
+
+  RIPEMD320_STEP (RIPEMD320_Io, e2, a2, b2, c2, d2, w1[2], RIPEMD320C60, RIPEMD320S60);
+  RIPEMD320_STEP (RIPEMD320_Io, d2, e2, a2, b2, c2, w2[3], RIPEMD320C60, RIPEMD320S61);
+  RIPEMD320_STEP (RIPEMD320_Io, c2, d2, e2, a2, b2, w0[3], RIPEMD320C60, RIPEMD320S62);
+  RIPEMD320_STEP (RIPEMD320_Io, b2, c2, d2, e2, a2, w1[3], RIPEMD320C60, RIPEMD320S63);
+  RIPEMD320_STEP (RIPEMD320_Io, a2, b2, c2, d2, e2, w0[0], RIPEMD320C60, RIPEMD320S64);
+  RIPEMD320_STEP (RIPEMD320_Io, e2, a2, b2, c2, d2, w3[1], RIPEMD320C60, RIPEMD320S65);
+  RIPEMD320_STEP (RIPEMD320_Io, d2, e2, a2, b2, c2, w1[1], RIPEMD320C60, RIPEMD320S66);
+  RIPEMD320_STEP (RIPEMD320_Io, c2, d2, e2, a2, b2, w2[2], RIPEMD320C60, RIPEMD320S67);
+  RIPEMD320_STEP (RIPEMD320_Io, b2, c2, d2, e2, a2, w3[2], RIPEMD320C60, RIPEMD320S68);
+  RIPEMD320_STEP (RIPEMD320_Io, a2, b2, c2, d2, e2, w3[3], RIPEMD320C60, RIPEMD320S69);
+  RIPEMD320_STEP (RIPEMD320_Io, e2, a2, b2, c2, d2, w2[0], RIPEMD320C60, RIPEMD320S6A);
+  RIPEMD320_STEP (RIPEMD320_Io, d2, e2, a2, b2, c2, w3[0], RIPEMD320C60, RIPEMD320S6B);
+  RIPEMD320_STEP (RIPEMD320_Io, c2, d2, e2, a2, b2, w1[0], RIPEMD320C60, RIPEMD320S6C);
+  RIPEMD320_STEP (RIPEMD320_Io, b2, c2, d2, e2, a2, w2[1], RIPEMD320C60, RIPEMD320S6D);
+  RIPEMD320_STEP (RIPEMD320_Io, a2, b2, c2, d2, e2, w0[1], RIPEMD320C60, RIPEMD320S6E);
+  RIPEMD320_STEP (RIPEMD320_Io, e2, a2, b2, c2, d2, w0[2], RIPEMD320C60, RIPEMD320S6F);
+
+  tmp = b1; b1 = b2; b2 = tmp;
+
+  RIPEMD320_STEP (RIPEMD320_H , d1, e1, a1, b1, c1, w0[3], RIPEMD320C20, RIPEMD320S20);
+  RIPEMD320_STEP (RIPEMD320_H , c1, d1, e1, a1, b1, w2[2], RIPEMD320C20, RIPEMD320S21);
+  RIPEMD320_STEP (RIPEMD320_H , b1, c1, d1, e1, a1, w3[2], RIPEMD320C20, RIPEMD320S22);
+  RIPEMD320_STEP (RIPEMD320_H , a1, b1, c1, d1, e1, w1[0], RIPEMD320C20, RIPEMD320S23);
+  RIPEMD320_STEP (RIPEMD320_H , e1, a1, b1, c1, d1, w2[1], RIPEMD320C20, RIPEMD320S24);
+  RIPEMD320_STEP (RIPEMD320_H , d1, e1, a1, b1, c1, w3[3], RIPEMD320C20, RIPEMD320S25);
+  RIPEMD320_STEP (RIPEMD320_H , c1, d1, e1, a1, b1, w2[0], RIPEMD320C20, RIPEMD320S26);
+  RIPEMD320_STEP (RIPEMD320_H , b1, c1, d1, e1, a1, w0[1], RIPEMD320C20, RIPEMD320S27);
+  RIPEMD320_STEP (RIPEMD320_H , a1, b1, c1, d1, e1, w0[2], RIPEMD320C20, RIPEMD320S28);
+  RIPEMD320_STEP (RIPEMD320_H , e1, a1, b1, c1, d1, w1[3], RIPEMD320C20, RIPEMD320S29);
+  RIPEMD320_STEP (RIPEMD320_H , d1, e1, a1, b1, c1, w0[0], RIPEMD320C20, RIPEMD320S2A);
+  RIPEMD320_STEP (RIPEMD320_H , c1, d1, e1, a1, b1, w1[2], RIPEMD320C20, RIPEMD320S2B);
+  RIPEMD320_STEP (RIPEMD320_H , b1, c1, d1, e1, a1, w3[1], RIPEMD320C20, RIPEMD320S2C);
+  RIPEMD320_STEP (RIPEMD320_H , a1, b1, c1, d1, e1, w2[3], RIPEMD320C20, RIPEMD320S2D);
+  RIPEMD320_STEP (RIPEMD320_H , e1, a1, b1, c1, d1, w1[1], RIPEMD320C20, RIPEMD320S2E);
+  RIPEMD320_STEP (RIPEMD320_H , d1, e1, a1, b1, c1, w3[0], RIPEMD320C20, RIPEMD320S2F);
+
+  RIPEMD320_STEP (RIPEMD320_H , d2, e2, a2, b2, c2, w3[3], RIPEMD320C70, RIPEMD320S70);
+  RIPEMD320_STEP (RIPEMD320_H , c2, d2, e2, a2, b2, w1[1], RIPEMD320C70, RIPEMD320S71);
+  RIPEMD320_STEP (RIPEMD320_H , b2, c2, d2, e2, a2, w0[1], RIPEMD320C70, RIPEMD320S72);
+  RIPEMD320_STEP (RIPEMD320_H , a2, b2, c2, d2, e2, w0[3], RIPEMD320C70, RIPEMD320S73);
+  RIPEMD320_STEP (RIPEMD320_H , e2, a2, b2, c2, d2, w1[3], RIPEMD320C70, RIPEMD320S74);
+  RIPEMD320_STEP (RIPEMD320_H , d2, e2, a2, b2, c2, w3[2], RIPEMD320C70, RIPEMD320S75);
+  RIPEMD320_STEP (RIPEMD320_H , c2, d2, e2, a2, b2, w1[2], RIPEMD320C70, RIPEMD320S76);
+  RIPEMD320_STEP (RIPEMD320_H , b2, c2, d2, e2, a2, w2[1], RIPEMD320C70, RIPEMD320S77);
+  RIPEMD320_STEP (RIPEMD320_H , a2, b2, c2, d2, e2, w2[3], RIPEMD320C70, RIPEMD320S78);
+  RIPEMD320_STEP (RIPEMD320_H , e2, a2, b2, c2, d2, w2[0], RIPEMD320C70, RIPEMD320S79);
+  RIPEMD320_STEP (RIPEMD320_H , d2, e2, a2, b2, c2, w3[0], RIPEMD320C70, RIPEMD320S7A);
+  RIPEMD320_STEP (RIPEMD320_H , c2, d2, e2, a2, b2, w0[2], RIPEMD320C70, RIPEMD320S7B);
+  RIPEMD320_STEP (RIPEMD320_H , b2, c2, d2, e2, a2, w2[2], RIPEMD320C70, RIPEMD320S7C);
+  RIPEMD320_STEP (RIPEMD320_H , a2, b2, c2, d2, e2, w0[0], RIPEMD320C70, RIPEMD320S7D);
+  RIPEMD320_STEP (RIPEMD320_H , e2, a2, b2, c2, d2, w1[0], RIPEMD320C70, RIPEMD320S7E);
+  RIPEMD320_STEP (RIPEMD320_H , d2, e2, a2, b2, c2, w3[1], RIPEMD320C70, RIPEMD320S7F);
+
+  tmp = c1; c1 = c2; c2 = tmp;
+
+  RIPEMD320_STEP (RIPEMD320_Io, c1, d1, e1, a1, b1, w0[1], RIPEMD320C30, RIPEMD320S30);
+  RIPEMD320_STEP (RIPEMD320_Io, b1, c1, d1, e1, a1, w2[1], RIPEMD320C30, RIPEMD320S31);
+  RIPEMD320_STEP (RIPEMD320_Io, a1, b1, c1, d1, e1, w2[3], RIPEMD320C30, RIPEMD320S32);
+  RIPEMD320_STEP (RIPEMD320_Io, e1, a1, b1, c1, d1, w2[2], RIPEMD320C30, RIPEMD320S33);
+  RIPEMD320_STEP (RIPEMD320_Io, d1, e1, a1, b1, c1, w0[0], RIPEMD320C30, RIPEMD320S34);
+  RIPEMD320_STEP (RIPEMD320_Io, c1, d1, e1, a1, b1, w2[0], RIPEMD320C30, RIPEMD320S35);
+  RIPEMD320_STEP (RIPEMD320_Io, b1, c1, d1, e1, a1, w3[0], RIPEMD320C30, RIPEMD320S36);
+  RIPEMD320_STEP (RIPEMD320_Io, a1, b1, c1, d1, e1, w1[0], RIPEMD320C30, RIPEMD320S37);
+  RIPEMD320_STEP (RIPEMD320_Io, e1, a1, b1, c1, d1, w3[1], RIPEMD320C30, RIPEMD320S38);
+  RIPEMD320_STEP (RIPEMD320_Io, d1, e1, a1, b1, c1, w0[3], RIPEMD320C30, RIPEMD320S39);
+  RIPEMD320_STEP (RIPEMD320_Io, c1, d1, e1, a1, b1, w1[3], RIPEMD320C30, RIPEMD320S3A);
+  RIPEMD320_STEP (RIPEMD320_Io, b1, c1, d1, e1, a1, w3[3], RIPEMD320C30, RIPEMD320S3B);
+  RIPEMD320_STEP (RIPEMD320_Io, a1, b1, c1, d1, e1, w3[2], RIPEMD320C30, RIPEMD320S3C);
+  RIPEMD320_STEP (RIPEMD320_Io, e1, a1, b1, c1, d1, w1[1], RIPEMD320C30, RIPEMD320S3D);
+  RIPEMD320_STEP (RIPEMD320_Io, d1, e1, a1, b1, c1, w1[2], RIPEMD320C30, RIPEMD320S3E);
+  RIPEMD320_STEP (RIPEMD320_Io, c1, d1, e1, a1, b1, w0[2], RIPEMD320C30, RIPEMD320S3F);
+
+  RIPEMD320_STEP (RIPEMD320_Go, c2, d2, e2, a2, b2, w2[0], RIPEMD320C80, RIPEMD320S80);
+  RIPEMD320_STEP (RIPEMD320_Go, b2, c2, d2, e2, a2, w1[2], RIPEMD320C80, RIPEMD320S81);
+  RIPEMD320_STEP (RIPEMD320_Go, a2, b2, c2, d2, e2, w1[0], RIPEMD320C80, RIPEMD320S82);
+  RIPEMD320_STEP (RIPEMD320_Go, e2, a2, b2, c2, d2, w0[1], RIPEMD320C80, RIPEMD320S83);
+  RIPEMD320_STEP (RIPEMD320_Go, d2, e2, a2, b2, c2, w0[3], RIPEMD320C80, RIPEMD320S84);
+  RIPEMD320_STEP (RIPEMD320_Go, c2, d2, e2, a2, b2, w2[3], RIPEMD320C80, RIPEMD320S85);
+  RIPEMD320_STEP (RIPEMD320_Go, b2, c2, d2, e2, a2, w3[3], RIPEMD320C80, RIPEMD320S86);
+  RIPEMD320_STEP (RIPEMD320_Go, a2, b2, c2, d2, e2, w0[0], RIPEMD320C80, RIPEMD320S87);
+  RIPEMD320_STEP (RIPEMD320_Go, e2, a2, b2, c2, d2, w1[1], RIPEMD320C80, RIPEMD320S88);
+  RIPEMD320_STEP (RIPEMD320_Go, d2, e2, a2, b2, c2, w3[0], RIPEMD320C80, RIPEMD320S89);
+  RIPEMD320_STEP (RIPEMD320_Go, c2, d2, e2, a2, b2, w0[2], RIPEMD320C80, RIPEMD320S8A);
+  RIPEMD320_STEP (RIPEMD320_Go, b2, c2, d2, e2, a2, w3[1], RIPEMD320C80, RIPEMD320S8B);
+  RIPEMD320_STEP (RIPEMD320_Go, a2, b2, c2, d2, e2, w2[1], RIPEMD320C80, RIPEMD320S8C);
+  RIPEMD320_STEP (RIPEMD320_Go, e2, a2, b2, c2, d2, w1[3], RIPEMD320C80, RIPEMD320S8D);
+  RIPEMD320_STEP (RIPEMD320_Go, d2, e2, a2, b2, c2, w2[2], RIPEMD320C80, RIPEMD320S8E);
+  RIPEMD320_STEP (RIPEMD320_Go, c2, d2, e2, a2, b2, w3[2], RIPEMD320C80, RIPEMD320S8F);
+
+  tmp = d1; d1 = d2; d2 = tmp;
+
+  RIPEMD320_STEP (RIPEMD320_J , b1, c1, d1, e1, a1, w1[0], RIPEMD320C40, RIPEMD320S40);
+  RIPEMD320_STEP (RIPEMD320_J , a1, b1, c1, d1, e1, w0[0], RIPEMD320C40, RIPEMD320S41);
+  RIPEMD320_STEP (RIPEMD320_J , e1, a1, b1, c1, d1, w1[1], RIPEMD320C40, RIPEMD320S42);
+  RIPEMD320_STEP (RIPEMD320_J , d1, e1, a1, b1, c1, w2[1], RIPEMD320C40, RIPEMD320S43);
+  RIPEMD320_STEP (RIPEMD320_J , c1, d1, e1, a1, b1, w1[3], RIPEMD320C40, RIPEMD320S44);
+  RIPEMD320_STEP (RIPEMD320_J , b1, c1, d1, e1, a1, w3[0], RIPEMD320C40, RIPEMD320S45);
+  RIPEMD320_STEP (RIPEMD320_J , a1, b1, c1, d1, e1, w0[2], RIPEMD320C40, RIPEMD320S46);
+  RIPEMD320_STEP (RIPEMD320_J , e1, a1, b1, c1, d1, w2[2], RIPEMD320C40, RIPEMD320S47);
+  RIPEMD320_STEP (RIPEMD320_J , d1, e1, a1, b1, c1, w3[2], RIPEMD320C40, RIPEMD320S48);
+  RIPEMD320_STEP (RIPEMD320_J , c1, d1, e1, a1, b1, w0[1], RIPEMD320C40, RIPEMD320S49);
+  RIPEMD320_STEP (RIPEMD320_J , b1, c1, d1, e1, a1, w0[3], RIPEMD320C40, RIPEMD320S4A);
+  RIPEMD320_STEP (RIPEMD320_J , a1, b1, c1, d1, e1, w2[0], RIPEMD320C40, RIPEMD320S4B);
+  RIPEMD320_STEP (RIPEMD320_J , e1, a1, b1, c1, d1, w2[3], RIPEMD320C40, RIPEMD320S4C);
+  RIPEMD320_STEP (RIPEMD320_J , d1, e1, a1, b1, c1, w1[2], RIPEMD320C40, RIPEMD320S4D);
+  RIPEMD320_STEP (RIPEMD320_J , c1, d1, e1, a1, b1, w3[3], RIPEMD320C40, RIPEMD320S4E);
+  RIPEMD320_STEP (RIPEMD320_J , b1, c1, d1, e1, a1, w3[1], RIPEMD320C40, RIPEMD320S4F);
+
+  RIPEMD320_STEP (RIPEMD320_F , b2, c2, d2, e2, a2, w3[0], RIPEMD320C90, RIPEMD320S90);
+  RIPEMD320_STEP (RIPEMD320_F , a2, b2, c2, d2, e2, w3[3], RIPEMD320C90, RIPEMD320S91);
+  RIPEMD320_STEP (RIPEMD320_F , e2, a2, b2, c2, d2, w2[2], RIPEMD320C90, RIPEMD320S92);
+  RIPEMD320_STEP (RIPEMD320_F , d2, e2, a2, b2, c2, w1[0], RIPEMD320C90, RIPEMD320S93);
+  RIPEMD320_STEP (RIPEMD320_F , c2, d2, e2, a2, b2, w0[1], RIPEMD320C90, RIPEMD320S94);
+  RIPEMD320_STEP (RIPEMD320_F , b2, c2, d2, e2, a2, w1[1], RIPEMD320C90, RIPEMD320S95);
+  RIPEMD320_STEP (RIPEMD320_F , a2, b2, c2, d2, e2, w2[0], RIPEMD320C90, RIPEMD320S96);
+  RIPEMD320_STEP (RIPEMD320_F , e2, a2, b2, c2, d2, w1[3], RIPEMD320C90, RIPEMD320S97);
+  RIPEMD320_STEP (RIPEMD320_F , d2, e2, a2, b2, c2, w1[2], RIPEMD320C90, RIPEMD320S98);
+  RIPEMD320_STEP (RIPEMD320_F , c2, d2, e2, a2, b2, w0[2], RIPEMD320C90, RIPEMD320S99);
+  RIPEMD320_STEP (RIPEMD320_F , b2, c2, d2, e2, a2, w3[1], RIPEMD320C90, RIPEMD320S9A);
+  RIPEMD320_STEP (RIPEMD320_F , a2, b2, c2, d2, e2, w3[2], RIPEMD320C90, RIPEMD320S9B);
+  RIPEMD320_STEP (RIPEMD320_F , e2, a2, b2, c2, d2, w0[0], RIPEMD320C90, RIPEMD320S9C);
+  RIPEMD320_STEP (RIPEMD320_F , d2, e2, a2, b2, c2, w0[3], RIPEMD320C90, RIPEMD320S9D);
+  RIPEMD320_STEP (RIPEMD320_F , c2, d2, e2, a2, b2, w2[1], RIPEMD320C90, RIPEMD320S9E);
+  RIPEMD320_STEP (RIPEMD320_F , b2, c2, d2, e2, a2, w2[3], RIPEMD320C90, RIPEMD320S9F);
+
+  tmp = e1; e1 = e2; e2 = tmp;
+
+  const u32x a = digest[0] + a1;
+  const u32x b = digest[1] + b1;
+  const u32x c = digest[2] + c1;
+  const u32x d = digest[3] + d1;
+  const u32x e = digest[4] + e1;
+  const u32x f = digest[5] + a2;
+  const u32x g = digest[6] + b2;
+  const u32x h = digest[7] + c2;
+  const u32x i = digest[8] + d2;
+  const u32x l = digest[9] + e2;
+
+  digest[0] = a;
+  digest[1] = b;
+  digest[2] = c;
+  digest[3] = d;
+  digest[4] = e;
+  digest[5] = f;
+  digest[6] = g;
+  digest[7] = h;
+  digest[8] = i;
+  digest[9] = l;
+}
+
+DECLSPEC void ripemd320_init_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx)
+{
+  ctx->h[0] = RIPEMD320M_A;
+  ctx->h[1] = RIPEMD320M_B;
+  ctx->h[2] = RIPEMD320M_C;
+  ctx->h[3] = RIPEMD320M_D;
+  ctx->h[4] = RIPEMD320M_E;
+  ctx->h[5] = RIPEMD320M_F;
+  ctx->h[6] = RIPEMD320M_G;
+  ctx->h[7] = RIPEMD320M_H;
+  ctx->h[8] = RIPEMD320M_I;
+  ctx->h[9] = RIPEMD320M_L;
+
+  ctx->w0[0] = 0;
+  ctx->w0[1] = 0;
+  ctx->w0[2] = 0;
+  ctx->w0[3] = 0;
+  ctx->w1[0] = 0;
+  ctx->w1[1] = 0;
+  ctx->w1[2] = 0;
+  ctx->w1[3] = 0;
+  ctx->w2[0] = 0;
+  ctx->w2[1] = 0;
+  ctx->w2[2] = 0;
+  ctx->w2[3] = 0;
+  ctx->w3[0] = 0;
+  ctx->w3[1] = 0;
+  ctx->w3[2] = 0;
+  ctx->w3[3] = 0;
+
+  ctx->len = 0;
+}
+
+DECLSPEC void ripemd320_init_vector_from_scalar (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS ripemd320_ctx_t *ctx0)
+{
+  ctx->h[0] = ctx0->h[0];
+  ctx->h[1] = ctx0->h[1];
+  ctx->h[2] = ctx0->h[2];
+  ctx->h[3] = ctx0->h[3];
+  ctx->h[4] = ctx0->h[4];
+  ctx->h[5] = ctx0->h[5];
+  ctx->h[6] = ctx0->h[6];
+  ctx->h[7] = ctx0->h[7];
+  ctx->h[8] = ctx0->h[8];
+  ctx->h[9] = ctx0->h[9];
+
+  ctx->w0[0] = ctx0->w0[0];
+  ctx->w0[1] = ctx0->w0[1];
+  ctx->w0[2] = ctx0->w0[2];
+  ctx->w0[3] = ctx0->w0[3];
+  ctx->w1[0] = ctx0->w1[0];
+  ctx->w1[1] = ctx0->w1[1];
+  ctx->w1[2] = ctx0->w1[2];
+  ctx->w1[3] = ctx0->w1[3];
+  ctx->w2[0] = ctx0->w2[0];
+  ctx->w2[1] = ctx0->w2[1];
+  ctx->w2[2] = ctx0->w2[2];
+  ctx->w2[3] = ctx0->w2[3];
+  ctx->w3[0] = ctx0->w3[0];
+  ctx->w3[1] = ctx0->w3[1];
+  ctx->w3[2] = ctx0->w3[2];
+  ctx->w3[3] = ctx0->w3[3];
+
+  ctx->len = ctx0->len;
+}
+
+DECLSPEC void ripemd320_update_vector_64 (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len)
+{
+  if (len == 0) return;
+
+  const int pos = ctx->len & 63;
+
+  ctx->len += len;
+
+  if (pos == 0)
+  {
+    ctx->w0[0] = w0[0];
+    ctx->w0[1] = w0[1];
+    ctx->w0[2] = w0[2];
+    ctx->w0[3] = w0[3];
+    ctx->w1[0] = w1[0];
+    ctx->w1[1] = w1[1];
+    ctx->w1[2] = w1[2];
+    ctx->w1[3] = w1[3];
+    ctx->w2[0] = w2[0];
+    ctx->w2[1] = w2[1];
+    ctx->w2[2] = w2[2];
+    ctx->w2[3] = w2[3];
+    ctx->w3[0] = w3[0];
+    ctx->w3[1] = w3[1];
+    ctx->w3[2] = w3[2];
+    ctx->w3[3] = w3[3];
+
+    if (len == 64)
+    {
+      ripemd320_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+
+      ctx->w0[0] = 0;
+      ctx->w0[1] = 0;
+      ctx->w0[2] = 0;
+      ctx->w0[3] = 0;
+      ctx->w1[0] = 0;
+      ctx->w1[1] = 0;
+      ctx->w1[2] = 0;
+      ctx->w1[3] = 0;
+      ctx->w2[0] = 0;
+      ctx->w2[1] = 0;
+      ctx->w2[2] = 0;
+      ctx->w2[3] = 0;
+      ctx->w3[0] = 0;
+      ctx->w3[1] = 0;
+      ctx->w3[2] = 0;
+      ctx->w3[3] = 0;
+    }
+  }
+  else
+  {
+    if ((pos + len) < 64)
+    {
+      switch_buffer_by_offset_le (w0, w1, w2, w3, pos);
+
+      ctx->w0[0] |= w0[0];
+      ctx->w0[1] |= w0[1];
+      ctx->w0[2] |= w0[2];
+      ctx->w0[3] |= w0[3];
+      ctx->w1[0] |= w1[0];
+      ctx->w1[1] |= w1[1];
+      ctx->w1[2] |= w1[2];
+      ctx->w1[3] |= w1[3];
+      ctx->w2[0] |= w2[0];
+      ctx->w2[1] |= w2[1];
+      ctx->w2[2] |= w2[2];
+      ctx->w2[3] |= w2[3];
+      ctx->w3[0] |= w3[0];
+      ctx->w3[1] |= w3[1];
+      ctx->w3[2] |= w3[2];
+      ctx->w3[3] |= w3[3];
+    }
+    else
+    {
+      u32x c0[4] = { 0 };
+      u32x c1[4] = { 0 };
+      u32x c2[4] = { 0 };
+      u32x c3[4] = { 0 };
+
+      switch_buffer_by_offset_carry_le (w0, w1, w2, w3, c0, c1, c2, c3, pos);
+
+      ctx->w0[0] |= w0[0];
+      ctx->w0[1] |= w0[1];
+      ctx->w0[2] |= w0[2];
+      ctx->w0[3] |= w0[3];
+      ctx->w1[0] |= w1[0];
+      ctx->w1[1] |= w1[1];
+      ctx->w1[2] |= w1[2];
+      ctx->w1[3] |= w1[3];
+      ctx->w2[0] |= w2[0];
+      ctx->w2[1] |= w2[1];
+      ctx->w2[2] |= w2[2];
+      ctx->w2[3] |= w2[3];
+      ctx->w3[0] |= w3[0];
+      ctx->w3[1] |= w3[1];
+      ctx->w3[2] |= w3[2];
+      ctx->w3[3] |= w3[3];
+
+      ripemd320_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+
+      ctx->w0[0] = c0[0];
+      ctx->w0[1] = c0[1];
+      ctx->w0[2] = c0[2];
+      ctx->w0[3] = c0[3];
+      ctx->w1[0] = c1[0];
+      ctx->w1[1] = c1[1];
+      ctx->w1[2] = c1[2];
+      ctx->w1[3] = c1[3];
+      ctx->w2[0] = c2[0];
+      ctx->w2[1] = c2[1];
+      ctx->w2[2] = c2[2];
+      ctx->w2[3] = c2[3];
+      ctx->w3[0] = c3[0];
+      ctx->w3[1] = c3[1];
+      ctx->w3[2] = c3[2];
+      ctx->w3[3] = c3[3];
+    }
+  }
+}
+
+DECLSPEC void ripemd320_update_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, len - pos1);
+}
+
+DECLSPEC void ripemd320_update_vector_swap (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    w0[0] = hc_swap32 (w0[0]);
+    w0[1] = hc_swap32 (w0[1]);
+    w0[2] = hc_swap32 (w0[2]);
+    w0[3] = hc_swap32 (w0[3]);
+    w1[0] = hc_swap32 (w1[0]);
+    w1[1] = hc_swap32 (w1[1]);
+    w1[2] = hc_swap32 (w1[2]);
+    w1[3] = hc_swap32 (w1[3]);
+    w2[0] = hc_swap32 (w2[0]);
+    w2[1] = hc_swap32 (w2[1]);
+    w2[2] = hc_swap32 (w2[2]);
+    w2[3] = hc_swap32 (w2[3]);
+    w3[0] = hc_swap32 (w3[0]);
+    w3[1] = hc_swap32 (w3[1]);
+    w3[2] = hc_swap32 (w3[2]);
+    w3[3] = hc_swap32 (w3[3]);
+
+    ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  w0[0] = hc_swap32 (w0[0]);
+  w0[1] = hc_swap32 (w0[1]);
+  w0[2] = hc_swap32 (w0[2]);
+  w0[3] = hc_swap32 (w0[3]);
+  w1[0] = hc_swap32 (w1[0]);
+  w1[1] = hc_swap32 (w1[1]);
+  w1[2] = hc_swap32 (w1[2]);
+  w1[3] = hc_swap32 (w1[3]);
+  w2[0] = hc_swap32 (w2[0]);
+  w2[1] = hc_swap32 (w2[1]);
+  w2[2] = hc_swap32 (w2[2]);
+  w2[3] = hc_swap32 (w2[3]);
+  w3[0] = hc_swap32 (w3[0]);
+  w3[1] = hc_swap32 (w3[1]);
+  w3[2] = hc_swap32 (w3[2]);
+  w3[3] = hc_swap32 (w3[3]);
+
+  ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, len - pos1);
+}
+
+DECLSPEC void ripemd320_update_vector_utf16le (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
+  {
+    w0[0] = w[pos4 + 0];
+    w0[1] = w[pos4 + 1];
+    w0[2] = w[pos4 + 2];
+    w0[3] = w[pos4 + 3];
+    w1[0] = w[pos4 + 4];
+    w1[1] = w[pos4 + 5];
+    w1[2] = w[pos4 + 6];
+    w1[3] = w[pos4 + 7];
+
+    make_utf16le (w1, w2, w3);
+    make_utf16le (w0, w0, w1);
+
+    ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, 32 * 2);
+  }
+
+  w0[0] = w[pos4 + 0];
+  w0[1] = w[pos4 + 1];
+  w0[2] = w[pos4 + 2];
+  w0[3] = w[pos4 + 3];
+  w1[0] = w[pos4 + 4];
+  w1[1] = w[pos4 + 5];
+  w1[2] = w[pos4 + 6];
+  w1[3] = w[pos4 + 7];
+
+  make_utf16le (w1, w2, w3);
+  make_utf16le (w0, w0, w1);
+
+  ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
+}
+
+DECLSPEC void ripemd320_update_vector_utf16le_swap (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
+  {
+    w0[0] = w[pos4 + 0];
+    w0[1] = w[pos4 + 1];
+    w0[2] = w[pos4 + 2];
+    w0[3] = w[pos4 + 3];
+    w1[0] = w[pos4 + 4];
+    w1[1] = w[pos4 + 5];
+    w1[2] = w[pos4 + 6];
+    w1[3] = w[pos4 + 7];
+
+    make_utf16le (w1, w2, w3);
+    make_utf16le (w0, w0, w1);
+
+    w0[0] = hc_swap32 (w0[0]);
+    w0[1] = hc_swap32 (w0[1]);
+    w0[2] = hc_swap32 (w0[2]);
+    w0[3] = hc_swap32 (w0[3]);
+    w1[0] = hc_swap32 (w1[0]);
+    w1[1] = hc_swap32 (w1[1]);
+    w1[2] = hc_swap32 (w1[2]);
+    w1[3] = hc_swap32 (w1[3]);
+    w2[0] = hc_swap32 (w2[0]);
+    w2[1] = hc_swap32 (w2[1]);
+    w2[2] = hc_swap32 (w2[2]);
+    w2[3] = hc_swap32 (w2[3]);
+    w3[0] = hc_swap32 (w3[0]);
+    w3[1] = hc_swap32 (w3[1]);
+    w3[2] = hc_swap32 (w3[2]);
+    w3[3] = hc_swap32 (w3[3]);
+
+    ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, 32 * 2);
+  }
+
+  w0[0] = w[pos4 + 0];
+  w0[1] = w[pos4 + 1];
+  w0[2] = w[pos4 + 2];
+  w0[3] = w[pos4 + 3];
+  w1[0] = w[pos4 + 4];
+  w1[1] = w[pos4 + 5];
+  w1[2] = w[pos4 + 6];
+  w1[3] = w[pos4 + 7];
+
+  make_utf16le (w1, w2, w3);
+  make_utf16le (w0, w0, w1);
+
+  w0[0] = hc_swap32 (w0[0]);
+  w0[1] = hc_swap32 (w0[1]);
+  w0[2] = hc_swap32 (w0[2]);
+  w0[3] = hc_swap32 (w0[3]);
+  w1[0] = hc_swap32 (w1[0]);
+  w1[1] = hc_swap32 (w1[1]);
+  w1[2] = hc_swap32 (w1[2]);
+  w1[3] = hc_swap32 (w1[3]);
+  w2[0] = hc_swap32 (w2[0]);
+  w2[1] = hc_swap32 (w2[1]);
+  w2[2] = hc_swap32 (w2[2]);
+  w2[3] = hc_swap32 (w2[3]);
+  w3[0] = hc_swap32 (w3[0]);
+  w3[1] = hc_swap32 (w3[1]);
+  w3[2] = hc_swap32 (w3[2]);
+  w3[3] = hc_swap32 (w3[3]);
+
+  ripemd320_update_vector_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
+}
+
+DECLSPEC void ripemd320_final_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx)
+{
+  const int pos = ctx->len & 63;
+
+  append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
+
+  if (pos >= 56)
+  {
+    ripemd320_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+
+    ctx->w0[0] = 0;
+    ctx->w0[1] = 0;
+    ctx->w0[2] = 0;
+    ctx->w0[3] = 0;
+    ctx->w1[0] = 0;
+    ctx->w1[1] = 0;
+    ctx->w1[2] = 0;
+    ctx->w1[3] = 0;
+    ctx->w2[0] = 0;
+    ctx->w2[1] = 0;
+    ctx->w2[2] = 0;
+    ctx->w2[3] = 0;
+    ctx->w3[0] = 0;
+    ctx->w3[1] = 0;
+    ctx->w3[2] = 0;
+    ctx->w3[3] = 0;
+  }
+
+  ctx->w3[2] = ctx->len * 8;
+  ctx->w3[3] = 0;
+
+  ripemd320_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
+}
+
+// HMAC + Vector
+
+DECLSPEC void ripemd320_hmac_init_vector_64 (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3)
+{
+  u32x a0[4];
+  u32x a1[4];
+  u32x a2[4];
+  u32x a3[4];
+
+  // ipad
+
+  a0[0] = w0[0] ^ 0x36363636;
+  a0[1] = w0[1] ^ 0x36363636;
+  a0[2] = w0[2] ^ 0x36363636;
+  a0[3] = w0[3] ^ 0x36363636;
+  a1[0] = w1[0] ^ 0x36363636;
+  a1[1] = w1[1] ^ 0x36363636;
+  a1[2] = w1[2] ^ 0x36363636;
+  a1[3] = w1[3] ^ 0x36363636;
+  a2[0] = w2[0] ^ 0x36363636;
+  a2[1] = w2[1] ^ 0x36363636;
+  a2[2] = w2[2] ^ 0x36363636;
+  a2[3] = w2[3] ^ 0x36363636;
+  a3[0] = w3[0] ^ 0x36363636;
+  a3[1] = w3[1] ^ 0x36363636;
+  a3[2] = w3[2] ^ 0x36363636;
+  a3[3] = w3[3] ^ 0x36363636;
+
+  ripemd320_init_vector (&ctx->ipad);
+
+  ripemd320_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64);
+
+  // opad
+
+  u32x b0[4];
+  u32x b1[4];
+  u32x b2[4];
+  u32x b3[4];
+
+  b0[0] = w0[0] ^ 0x5c5c5c5c;
+  b0[1] = w0[1] ^ 0x5c5c5c5c;
+  b0[2] = w0[2] ^ 0x5c5c5c5c;
+  b0[3] = w0[3] ^ 0x5c5c5c5c;
+  b1[0] = w1[0] ^ 0x5c5c5c5c;
+  b1[1] = w1[1] ^ 0x5c5c5c5c;
+  b1[2] = w1[2] ^ 0x5c5c5c5c;
+  b1[3] = w1[3] ^ 0x5c5c5c5c;
+  b2[0] = w2[0] ^ 0x5c5c5c5c;
+  b2[1] = w2[1] ^ 0x5c5c5c5c;
+  b2[2] = w2[2] ^ 0x5c5c5c5c;
+  b2[3] = w2[3] ^ 0x5c5c5c5c;
+  b3[0] = w3[0] ^ 0x5c5c5c5c;
+  b3[1] = w3[1] ^ 0x5c5c5c5c;
+  b3[2] = w3[2] ^ 0x5c5c5c5c;
+  b3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  ripemd320_init_vector (&ctx->opad);
+
+  ripemd320_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64);
+}
+
+DECLSPEC void ripemd320_hmac_init_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    ripemd320_ctx_vector_t tmp;
+
+    ripemd320_init_vector (&tmp);
+
+    ripemd320_update_vector (&tmp, w, len);
+
+    ripemd320_final_vector (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = tmp.h[5];
+    w1[2] = tmp.h[6];
+    w1[3] = tmp.h[7];
+    w2[0] = tmp.h[8];
+    w2[1] = tmp.h[9];
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  ripemd320_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
+}
+
+DECLSPEC void ripemd320_hmac_update_vector_64 (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len)
+{
+  ripemd320_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+DECLSPEC void ripemd320_hmac_update_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  ripemd320_update_vector (&ctx->ipad, w, len);
+}
+
+DECLSPEC void ripemd320_hmac_final_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx)
+{
+  ripemd320_final_vector (&ctx->ipad);
+
+  ctx->opad.w0[0] = ctx->ipad.h[0];
+  ctx->opad.w0[1] = ctx->ipad.h[1];
+  ctx->opad.w0[2] = ctx->ipad.h[2];
+  ctx->opad.w0[3] = ctx->ipad.h[3];
+  ctx->opad.w1[0] = ctx->ipad.h[4];
+  ctx->opad.w1[1] = ctx->ipad.h[5];
+  ctx->opad.w1[2] = ctx->ipad.h[6];
+  ctx->opad.w1[3] = ctx->ipad.h[7];
+  ctx->opad.w2[0] = ctx->ipad.h[8];
+  ctx->opad.w2[1] = ctx->ipad.h[9];
+  ctx->opad.w2[2] = 0;
+  ctx->opad.w2[3] = 0;
+  ctx->opad.w3[0] = 0;
+  ctx->opad.w3[1] = 0;
+  ctx->opad.w3[2] = 0;
+  ctx->opad.w3[3] = 0;
+
+  ctx->opad.len += 40;
+
+  ripemd320_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_ripemd320.h b/OpenCL/inc_hash_ripemd320.h
new file mode 100644
index 000000000..e9737bb94
--- /dev/null
+++ b/OpenCL/inc_hash_ripemd320.h
@@ -0,0 +1,147 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef INC_HASH_RIPEMD320_H
+#define INC_HASH_RIPEMD320_H
+
+#define RIPEMD320_F(x,y,z)    ((x) ^ (y) ^ (z))
+#define RIPEMD320_G(x,y,z)    ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */
+#define RIPEMD320_H(x,y,z)    (((x) | ~(y)) ^ (z))
+#define RIPEMD320_I(x,y,z)    ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */
+#define RIPEMD320_J(x,y,z)    ((x) ^ ((y) | ~(z)))
+
+#ifdef USE_BITSELECT
+#define RIPEMD320_Go(x,y,z)   (bitselect ((z), (y), (x)))
+#define RIPEMD320_Io(x,y,z)   (bitselect ((y), (x), (z)))
+#else
+#define RIPEMD320_Go(x,y,z)   (RIPEMD320_G ((x), (y), (z)))
+#define RIPEMD320_Io(x,y,z)   (RIPEMD320_I ((x), (y), (z)))
+#endif
+
+#define RIPEMD320_STEP_S(f,a,b,c,d,e,x,K,s) \
+{                                           \
+  a += K;                                   \
+  a += x;                                   \
+  a += f (b, c, d);                         \
+  a  = hc_rotl32_S (a, s);                  \
+  a += e;                                   \
+  c  = hc_rotl32_S (c, 10u);                \
+}
+
+#define RIPEMD320_STEP(f,a,b,c,d,e,x,K,s) \
+{                                         \
+  a += make_u32x (K);                     \
+  a += x;                                 \
+  a += f (b, c, d);                       \
+  a  = hc_rotl32 (a, s);                  \
+  a += e;                                 \
+  c  = hc_rotl32 (c, 10u);                \
+}
+
+#define ROTATE_LEFT_WORKAROUND_BUG(a,n) ((a << n) | (a >> (32 - n)))
+
+#define RIPEMD320_STEP_S_WORKAROUND_BUG(f,a,b,c,d,e,x,K,s)  \
+{                                           \
+  a += K;                                   \
+  a += x;                                   \
+  a += f (b, c, d);                         \
+  a  = ROTATE_LEFT_WORKAROUND_BUG (a, s);   \
+  a += e;                                   \
+  c  = hc_rotl32_S (c, 10u);                \
+}
+
+#define RIPEMD320_STEP_WORKAROUND_BUG(f,a,b,c,d,e,x,K,s)  \
+{                                         \
+  a += make_u32x (K);                     \
+  a += x;                                 \
+  a += f (b, c, d);                       \
+  a  = ROTATE_LEFT_WORKAROUND_BUG (a, s); \
+  a += e;                                 \
+  c  = hc_rotl32 (c, 10u);                \
+}
+
+typedef struct ripemd320_ctx
+{
+  u32 h[10];
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  int len;
+
+} ripemd320_ctx_t;
+
+typedef struct ripemd320_hmac_ctx
+{
+  ripemd320_ctx_t ipad;
+  ripemd320_ctx_t opad;
+
+} ripemd320_hmac_ctx_t;
+
+typedef struct ripemd320_ctx_vector
+{
+  u32x h[10];
+
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  int  len;
+
+} ripemd320_ctx_vector_t;
+
+typedef struct ripemd320_hmac_ctx_vector
+{
+  ripemd320_ctx_vector_t ipad;
+  ripemd320_ctx_vector_t opad;
+
+} ripemd320_hmac_ctx_vector_t;
+
+DECLSPEC void ripemd320_transform (PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3, PRIVATE_AS u32 *digest);
+DECLSPEC void ripemd320_init (PRIVATE_AS ripemd320_ctx_t *ctx);
+DECLSPEC void ripemd320_update_64 (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len);
+DECLSPEC void ripemd320_update (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_swap (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_utf16le (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_utf16le_swap (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_global (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_global_swap (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_global_utf16le (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_update_global_utf16le_swap (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_final (PRIVATE_AS ripemd320_ctx_t *ctx);
+DECLSPEC void ripemd320_hmac_init_64 (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3);
+DECLSPEC void ripemd320_hmac_init (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_init_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_init_global (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_init_global_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_64 (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len);
+DECLSPEC void ripemd320_hmac_update (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_utf16le (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_utf16le_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_global (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_global_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_global_utf16le (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_update_global_utf16le_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void ripemd320_hmac_final (PRIVATE_AS ripemd320_hmac_ctx_t *ctx);
+DECLSPEC void ripemd320_transform_vector (PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3, PRIVATE_AS u32x *digest);
+DECLSPEC void ripemd320_init_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx);
+DECLSPEC void ripemd320_init_vector_from_scalar (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS ripemd320_ctx_t *ctx0);
+DECLSPEC void ripemd320_update_vector_64 (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len);
+DECLSPEC void ripemd320_update_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void ripemd320_update_vector_swap (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void ripemd320_update_vector_utf16le (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void ripemd320_update_vector_utf16le_swap (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void ripemd320_final_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx);
+DECLSPEC void ripemd320_hmac_init_vector_64 (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3);
+DECLSPEC void ripemd320_hmac_init_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void ripemd320_hmac_update_vector_64 (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len);
+DECLSPEC void ripemd320_hmac_update_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void ripemd320_hmac_final_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx);
+
+#endif // INC_HASH_RIPEMD320_H
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 4d7b937d5..233e28958 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -1565,6 +1565,202 @@ typedef enum ripemd160_constants
 
 } ripemd160_constants_t;
 
+typedef enum ripemd320_constants
+{
+  RIPEMD320M_A=0x67452301U,
+  RIPEMD320M_B=0xefcdab89U,
+  RIPEMD320M_C=0x98badcfeU,
+  RIPEMD320M_D=0x10325476U,
+  RIPEMD320M_E=0xc3d2e1f0U,
+  RIPEMD320M_F=0x76543210U,
+  RIPEMD320M_G=0xfedcba98U,
+  RIPEMD320M_H=0x89abcdefU,
+  RIPEMD320M_I=0x01234567U,
+  RIPEMD320M_L=0x3c2d1e0fU,
+
+  RIPEMD320C00=0x00000000U,
+  RIPEMD320C10=0x5a827999U,
+  RIPEMD320C20=0x6ed9eba1U,
+  RIPEMD320C30=0x8f1bbcdcU,
+  RIPEMD320C40=0xa953fd4eU,
+  RIPEMD320C50=0x50a28be6U,
+  RIPEMD320C60=0x5c4dd124U,
+  RIPEMD320C70=0x6d703ef3U,
+  RIPEMD320C80=0x7a6d76e9U,
+  RIPEMD320C90=0x00000000U,
+
+  RIPEMD320S00=11,
+  RIPEMD320S01=14,
+  RIPEMD320S02=15,
+  RIPEMD320S03=12,
+  RIPEMD320S04=5,
+  RIPEMD320S05=8,
+  RIPEMD320S06=7,
+  RIPEMD320S07=9,
+  RIPEMD320S08=11,
+  RIPEMD320S09=13,
+  RIPEMD320S0A=14,
+  RIPEMD320S0B=15,
+  RIPEMD320S0C=6,
+  RIPEMD320S0D=7,
+  RIPEMD320S0E=9,
+  RIPEMD320S0F=8,
+
+  RIPEMD320S10=7,
+  RIPEMD320S11=6,
+  RIPEMD320S12=8,
+  RIPEMD320S13=13,
+  RIPEMD320S14=11,
+  RIPEMD320S15=9,
+  RIPEMD320S16=7,
+  RIPEMD320S17=15,
+  RIPEMD320S18=7,
+  RIPEMD320S19=12,
+  RIPEMD320S1A=15,
+  RIPEMD320S1B=9,
+  RIPEMD320S1C=11,
+  RIPEMD320S1D=7,
+  RIPEMD320S1E=13,
+  RIPEMD320S1F=12,
+
+  RIPEMD320S20=11,
+  RIPEMD320S21=13,
+  RIPEMD320S22=6,
+  RIPEMD320S23=7,
+  RIPEMD320S24=14,
+  RIPEMD320S25=9,
+  RIPEMD320S26=13,
+  RIPEMD320S27=15,
+  RIPEMD320S28=14,
+  RIPEMD320S29=8,
+  RIPEMD320S2A=13,
+  RIPEMD320S2B=6,
+  RIPEMD320S2C=5,
+  RIPEMD320S2D=12,
+  RIPEMD320S2E=7,
+  RIPEMD320S2F=5,
+
+  RIPEMD320S30=11,
+  RIPEMD320S31=12,
+  RIPEMD320S32=14,
+  RIPEMD320S33=15,
+  RIPEMD320S34=14,
+  RIPEMD320S35=15,
+  RIPEMD320S36=9,
+  RIPEMD320S37=8,
+  RIPEMD320S38=9,
+  RIPEMD320S39=14,
+  RIPEMD320S3A=5,
+  RIPEMD320S3B=6,
+  RIPEMD320S3C=8,
+  RIPEMD320S3D=6,
+  RIPEMD320S3E=5,
+  RIPEMD320S3F=12,
+
+  RIPEMD320S40=9,
+  RIPEMD320S41=15,
+  RIPEMD320S42=5,
+  RIPEMD320S43=11,
+  RIPEMD320S44=6,
+  RIPEMD320S45=8,
+  RIPEMD320S46=13,
+  RIPEMD320S47=12,
+  RIPEMD320S48=5,
+  RIPEMD320S49=12,
+  RIPEMD320S4A=13,
+  RIPEMD320S4B=14,
+  RIPEMD320S4C=11,
+  RIPEMD320S4D=8,
+  RIPEMD320S4E=5,
+  RIPEMD320S4F=6,
+
+  RIPEMD320S50=8,
+  RIPEMD320S51=9,
+  RIPEMD320S52=9,
+  RIPEMD320S53=11,
+  RIPEMD320S54=13,
+  RIPEMD320S55=15,
+  RIPEMD320S56=15,
+  RIPEMD320S57=5,
+  RIPEMD320S58=7,
+  RIPEMD320S59=7,
+  RIPEMD320S5A=8,
+  RIPEMD320S5B=11,
+  RIPEMD320S5C=14,
+  RIPEMD320S5D=14,
+  RIPEMD320S5E=12,
+  RIPEMD320S5F=6,
+
+  RIPEMD320S60=9,
+  RIPEMD320S61=13,
+  RIPEMD320S62=15,
+  RIPEMD320S63=7,
+  RIPEMD320S64=12,
+  RIPEMD320S65=8,
+  RIPEMD320S66=9,
+  RIPEMD320S67=11,
+  RIPEMD320S68=7,
+  RIPEMD320S69=7,
+  RIPEMD320S6A=12,
+  RIPEMD320S6B=7,
+  RIPEMD320S6C=6,
+  RIPEMD320S6D=15,
+  RIPEMD320S6E=13,
+  RIPEMD320S6F=11,
+
+  RIPEMD320S70=9,
+  RIPEMD320S71=7,
+  RIPEMD320S72=15,
+  RIPEMD320S73=11,
+  RIPEMD320S74=8,
+  RIPEMD320S75=6,
+  RIPEMD320S76=6,
+  RIPEMD320S77=14,
+  RIPEMD320S78=12,
+  RIPEMD320S79=13,
+  RIPEMD320S7A=5,
+  RIPEMD320S7B=14,
+  RIPEMD320S7C=13,
+  RIPEMD320S7D=13,
+  RIPEMD320S7E=7,
+  RIPEMD320S7F=5,
+
+  RIPEMD320S80=15,
+  RIPEMD320S81=5,
+  RIPEMD320S82=8,
+  RIPEMD320S83=11,
+  RIPEMD320S84=14,
+  RIPEMD320S85=14,
+  RIPEMD320S86=6,
+  RIPEMD320S87=14,
+  RIPEMD320S88=6,
+  RIPEMD320S89=9,
+  RIPEMD320S8A=12,
+  RIPEMD320S8B=9,
+  RIPEMD320S8C=12,
+  RIPEMD320S8D=5,
+  RIPEMD320S8E=15,
+  RIPEMD320S8F=8,
+
+  RIPEMD320S90=8,
+  RIPEMD320S91=5,
+  RIPEMD320S92=12,
+  RIPEMD320S93=9,
+  RIPEMD320S94=12,
+  RIPEMD320S95=5,
+  RIPEMD320S96=14,
+  RIPEMD320S97=6,
+  RIPEMD320S98=8,
+  RIPEMD320S99=13,
+  RIPEMD320S9A=6,
+  RIPEMD320S9B=5,
+  RIPEMD320S9C=15,
+  RIPEMD320S9D=13,
+  RIPEMD320S9E=11,
+  RIPEMD320S9F=11
+
+} ripemd320_constants_t;
+
 typedef enum keccak_constants
 {
   KECCAK_RNDC_00=0x0000000000000001UL,
diff --git a/OpenCL/m33600_a0-optimized.cl b/OpenCL/m33600_a0-optimized.cl
new file mode 100644
index 000000000..8e120caac
--- /dev/null
+++ b/OpenCL/m33600_a0-optimized.cl
@@ -0,0 +1,225 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
+#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PRIVATE_AS u32x *dgst)
+{
+  ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
+}
+
+KERNEL_FQ void m33600_m04 (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    u32x w0[4] = { 0 };
+    u32x w1[4] = { 0 };
+    u32x w2[4] = { 0 };
+    u32x w3[4] = { 0 };
+
+    const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
+
+    append_0x80_2x4_VV (w0, w1, out_len);
+
+    u32x w[16];
+
+    w[ 0] = w0[0];
+    w[ 1] = w0[1];
+    w[ 2] = w0[2];
+    w[ 3] = w0[3];
+    w[ 4] = w1[0];
+    w[ 5] = w1[1];
+    w[ 6] = w1[2];
+    w[ 7] = w1[3];
+    w[ 8] = 0;
+    w[ 9] = 0;
+    w[10] = 0;
+    w[11] = 0;
+    w[12] = 0;
+    w[13] = 0;
+    w[14] = out_len * 8;
+    w[15] = 0;
+
+    /**
+     * RipeMD320
+     */
+
+    u32x dgst[10];
+
+    dgst[0] = RIPEMD320M_A;
+    dgst[1] = RIPEMD320M_B;
+    dgst[2] = RIPEMD320M_C;
+    dgst[3] = RIPEMD320M_D;
+    dgst[4] = RIPEMD320M_E;
+    dgst[5] = RIPEMD320M_F;
+    dgst[6] = RIPEMD320M_G;
+    dgst[7] = RIPEMD320M_H;
+    dgst[8] = RIPEMD320M_I;
+    dgst[9] = RIPEMD320M_L;
+
+    ripemd320_transform_transport_vector (w, dgst);
+
+    COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
+  }
+}
+
+KERNEL_FQ void m33600_m08 (KERN_ATTR_RULES ())
+{
+}
+
+KERNEL_FQ void m33600_m16 (KERN_ATTR_RULES ())
+{
+}
+
+KERNEL_FQ void m33600_s04 (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    u32x w0[4] = { 0 };
+    u32x w1[4] = { 0 };
+    u32x w2[4] = { 0 };
+    u32x w3[4] = { 0 };
+
+    const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
+
+    append_0x80_2x4_VV (w0, w1, out_len);
+
+    u32x w[16];
+
+    w[ 0] = w0[0];
+    w[ 1] = w0[1];
+    w[ 2] = w0[2];
+    w[ 3] = w0[3];
+    w[ 4] = w1[0];
+    w[ 5] = w1[1];
+    w[ 6] = w1[2];
+    w[ 7] = w1[3];
+    w[ 8] = 0;
+    w[ 9] = 0;
+    w[10] = 0;
+    w[11] = 0;
+    w[12] = 0;
+    w[13] = 0;
+    w[14] = out_len * 8;
+    w[15] = 0;
+
+    /**
+     * RipeMD320
+     */
+
+    u32x dgst[10];
+
+    dgst[0] = RIPEMD320M_A;
+    dgst[1] = RIPEMD320M_B;
+    dgst[2] = RIPEMD320M_C;
+    dgst[3] = RIPEMD320M_D;
+    dgst[4] = RIPEMD320M_E;
+    dgst[5] = RIPEMD320M_F;
+    dgst[6] = RIPEMD320M_G;
+    dgst[7] = RIPEMD320M_H;
+    dgst[8] = RIPEMD320M_I;
+    dgst[9] = RIPEMD320M_L;
+
+    ripemd320_transform_transport_vector (w, dgst);
+
+    COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
+  }
+}
+
+KERNEL_FQ void m33600_s08 (KERN_ATTR_RULES ())
+{
+}
+
+KERNEL_FQ void m33600_s16 (KERN_ATTR_RULES ())
+{
+}
diff --git a/OpenCL/m33600_a0-pure.cl b/OpenCL/m33600_a0-pure.cl
new file mode 100644
index 000000000..9839edc58
--- /dev/null
+++ b/OpenCL/m33600_a0-pure.cl
@@ -0,0 +1,118 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp.h)
+#include M2S(INCLUDE_PATH/inc_rp.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33600_mxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    ripemd320_ctx_t ctx;
+
+    ripemd320_init (&ctx);
+
+    ripemd320_update (&ctx, tmp.i, tmp.pw_len);
+
+    ripemd320_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33600_sxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    ripemd320_ctx_t ctx;
+
+    ripemd320_init (&ctx);
+
+    ripemd320_update (&ctx, tmp.i, tmp.pw_len);
+
+    ripemd320_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33600_a1-optimized.cl b/OpenCL/m33600_a1-optimized.cl
new file mode 100644
index 000000000..efa719980
--- /dev/null
+++ b/OpenCL/m33600_a1-optimized.cl
@@ -0,0 +1,339 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PRIVATE_AS u32x *dgst)
+{
+  ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
+}
+
+KERNEL_FQ void m33600_m04 (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_l_len = pws[gid].pw_len & 63;
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
+
+    const u32x pw_len = (pw_l_len + pw_r_len) & 63;
+
+    /**
+     * concat password candidate
+     */
+
+    u32x wordl0[4] = { 0 };
+    u32x wordl1[4] = { 0 };
+    u32x wordl2[4] = { 0 };
+    u32x wordl3[4] = { 0 };
+
+    wordl0[0] = pw_buf0[0];
+    wordl0[1] = pw_buf0[1];
+    wordl0[2] = pw_buf0[2];
+    wordl0[3] = pw_buf0[3];
+    wordl1[0] = pw_buf1[0];
+    wordl1[1] = pw_buf1[1];
+    wordl1[2] = pw_buf1[2];
+    wordl1[3] = pw_buf1[3];
+
+    u32x wordr0[4] = { 0 };
+    u32x wordr1[4] = { 0 };
+    u32x wordr2[4] = { 0 };
+    u32x wordr3[4] = { 0 };
+
+    wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
+    wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
+    wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
+    wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
+    wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
+    wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
+    wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
+    wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
+
+    if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
+    {
+      switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
+    }
+    else
+    {
+      switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
+    }
+
+    u32x w0[4];
+    u32x w1[4];
+    u32x w2[4];
+    u32x w3[4];
+
+    w0[0] = wordl0[0] | wordr0[0];
+    w0[1] = wordl0[1] | wordr0[1];
+    w0[2] = wordl0[2] | wordr0[2];
+    w0[3] = wordl0[3] | wordr0[3];
+    w1[0] = wordl1[0] | wordr1[0];
+    w1[1] = wordl1[1] | wordr1[1];
+    w1[2] = wordl1[2] | wordr1[2];
+    w1[3] = wordl1[3] | wordr1[3];
+    w2[0] = wordl2[0] | wordr2[0];
+    w2[1] = wordl2[1] | wordr2[1];
+    w2[2] = wordl2[2] | wordr2[2];
+    w2[3] = wordl2[3] | wordr2[3];
+    w3[0] = wordl3[0] | wordr3[0];
+    w3[1] = wordl3[1] | wordr3[1];
+    w3[2] = wordl3[2] | wordr3[2];
+    w3[3] = wordl3[3] | wordr3[3];
+
+    /**
+     * RipeMD320
+     */
+
+    u32x w[16];
+
+    w[ 0] = w0[0];
+    w[ 1] = w0[1];
+    w[ 2] = w0[2];
+    w[ 3] = w0[3];
+    w[ 4] = w1[0];
+    w[ 5] = w1[1];
+    w[ 6] = w1[2];
+    w[ 7] = w1[3];
+    w[ 8] = w2[0];
+    w[ 9] = w2[1];
+    w[10] = w2[2];
+    w[11] = w2[3];
+    w[12] = w3[0];
+    w[13] = w3[1];
+    w[14] = pw_len * 8;
+    w[15] = 0;
+
+    u32x dgst[10];
+
+    dgst[0] = RIPEMD320M_A;
+    dgst[1] = RIPEMD320M_B;
+    dgst[2] = RIPEMD320M_C;
+    dgst[3] = RIPEMD320M_D;
+    dgst[4] = RIPEMD320M_E;
+    dgst[5] = RIPEMD320M_F;
+    dgst[6] = RIPEMD320M_G;
+    dgst[7] = RIPEMD320M_H;
+    dgst[8] = RIPEMD320M_I;
+    dgst[9] = RIPEMD320M_L;
+
+    ripemd320_transform_transport_vector (w, dgst);
+
+    COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
+  }
+}
+
+KERNEL_FQ void m33600_m08 (KERN_ATTR_BASIC ())
+{
+}
+
+KERNEL_FQ void m33600_m16 (KERN_ATTR_BASIC ())
+{
+}
+
+KERNEL_FQ void m33600_s04 (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_l_len = pws[gid].pw_len & 63;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
+
+    const u32x pw_len = (pw_l_len + pw_r_len) & 63;
+
+    /**
+     * concat password candidate
+     */
+
+    u32x wordl0[4] = { 0 };
+    u32x wordl1[4] = { 0 };
+    u32x wordl2[4] = { 0 };
+    u32x wordl3[4] = { 0 };
+
+    wordl0[0] = pw_buf0[0];
+    wordl0[1] = pw_buf0[1];
+    wordl0[2] = pw_buf0[2];
+    wordl0[3] = pw_buf0[3];
+    wordl1[0] = pw_buf1[0];
+    wordl1[1] = pw_buf1[1];
+    wordl1[2] = pw_buf1[2];
+    wordl1[3] = pw_buf1[3];
+
+    u32x wordr0[4] = { 0 };
+    u32x wordr1[4] = { 0 };
+    u32x wordr2[4] = { 0 };
+    u32x wordr3[4] = { 0 };
+
+    wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
+    wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
+    wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
+    wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
+    wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
+    wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
+    wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
+    wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
+
+    if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
+    {
+      switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
+    }
+    else
+    {
+      switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
+    }
+
+    u32x w0[4];
+    u32x w1[4];
+    u32x w2[4];
+    u32x w3[4];
+
+    w0[0] = wordl0[0] | wordr0[0];
+    w0[1] = wordl0[1] | wordr0[1];
+    w0[2] = wordl0[2] | wordr0[2];
+    w0[3] = wordl0[3] | wordr0[3];
+    w1[0] = wordl1[0] | wordr1[0];
+    w1[1] = wordl1[1] | wordr1[1];
+    w1[2] = wordl1[2] | wordr1[2];
+    w1[3] = wordl1[3] | wordr1[3];
+    w2[0] = wordl2[0] | wordr2[0];
+    w2[1] = wordl2[1] | wordr2[1];
+    w2[2] = wordl2[2] | wordr2[2];
+    w2[3] = wordl2[3] | wordr2[3];
+    w3[0] = wordl3[0] | wordr3[0];
+    w3[1] = wordl3[1] | wordr3[1];
+    w3[2] = wordl3[2] | wordr3[2];
+    w3[3] = wordl3[3] | wordr3[3];
+
+    /**
+     * RipeMD320
+     */
+
+    u32x w[16];
+
+    w[ 0] = w0[0];
+    w[ 1] = w0[1];
+    w[ 2] = w0[2];
+    w[ 3] = w0[3];
+    w[ 4] = w1[0];
+    w[ 5] = w1[1];
+    w[ 6] = w1[2];
+    w[ 7] = w1[3];
+    w[ 8] = w2[0];
+    w[ 9] = w2[1];
+    w[10] = w2[2];
+    w[11] = w2[3];
+    w[12] = w3[0];
+    w[13] = w3[1];
+    w[14] = pw_len * 8;
+    w[15] = 0;
+
+    u32x dgst[10];
+
+    dgst[0] = RIPEMD320M_A;
+    dgst[1] = RIPEMD320M_B;
+    dgst[2] = RIPEMD320M_C;
+    dgst[3] = RIPEMD320M_D;
+    dgst[4] = RIPEMD320M_E;
+    dgst[5] = RIPEMD320M_F;
+    dgst[6] = RIPEMD320M_G;
+    dgst[7] = RIPEMD320M_H;
+    dgst[8] = RIPEMD320M_I;
+    dgst[9] = RIPEMD320M_L;
+
+    ripemd320_transform_transport_vector (w, dgst);
+
+    COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
+  }
+}
+
+KERNEL_FQ void m33600_s08 (KERN_ATTR_BASIC ())
+{
+}
+
+KERNEL_FQ void m33600_s16 (KERN_ATTR_BASIC ())
+{
+}
diff --git a/OpenCL/m33600_a1-pure.cl b/OpenCL/m33600_a1-pure.cl
new file mode 100644
index 000000000..a97881806
--- /dev/null
+++ b/OpenCL/m33600_a1-pure.cl
@@ -0,0 +1,112 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33600_mxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  ripemd320_ctx_t ctx0;
+
+  ripemd320_init (&ctx0);
+
+  ripemd320_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    ripemd320_ctx_t ctx = ctx0;
+
+    ripemd320_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    ripemd320_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33600_sxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  ripemd320_ctx_t ctx0;
+
+  ripemd320_init (&ctx0);
+
+  ripemd320_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    ripemd320_ctx_t ctx = ctx0;
+
+    ripemd320_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    ripemd320_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33600_a3-optimized.cl b/OpenCL/m33600_a3-optimized.cl
new file mode 100644
index 000000000..f55c1a36f
--- /dev/null
+++ b/OpenCL/m33600_a3-optimized.cl
@@ -0,0 +1,447 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PRIVATE_AS u32x *dgst)
+{
+  ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
+}
+
+DECLSPEC void m33600m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_BASIC ())
+{
+  /**
+   * modifiers are taken from args
+   */
+
+  /**
+   * loop
+   */
+
+  u32 w0l = w0[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = ix_create_bft (bfs_buf, il_pos);
+
+    const u32x w0lr = w0l | w0r;
+
+    u32x w[16];
+
+    w[ 0] = w0lr;
+    w[ 1] = w0[1];
+    w[ 2] = w0[2];
+    w[ 3] = w0[3];
+    w[ 4] = w1[0];
+    w[ 5] = w1[1];
+    w[ 6] = w1[2];
+    w[ 7] = w1[3];
+    w[ 8] = w2[0];
+    w[ 9] = w2[1];
+    w[10] = w2[2];
+    w[11] = w2[3];
+    w[12] = w3[0];
+    w[13] = w3[1];
+    w[14] = pw_len * 8;
+    w[15] = 0;
+
+    /**
+     * RipeMD320
+     */
+
+    u32x dgst[10];
+
+    dgst[0] = RIPEMD320M_A;
+    dgst[1] = RIPEMD320M_B;
+    dgst[2] = RIPEMD320M_C;
+    dgst[3] = RIPEMD320M_D;
+    dgst[4] = RIPEMD320M_E;
+    dgst[5] = RIPEMD320M_F;
+    dgst[6] = RIPEMD320M_G;
+    dgst[7] = RIPEMD320M_H;
+    dgst[8] = RIPEMD320M_I;
+    dgst[9] = RIPEMD320M_L;
+
+    ripemd320_transform_transport_vector (w, dgst);
+
+    COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
+  }
+}
+
+DECLSPEC void m33600s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_BASIC ())
+{
+  /**
+   * modifiers are taken from args
+   */
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  u32 w0l = w0[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = ix_create_bft (bfs_buf, il_pos);
+
+    const u32x w0lr = w0l | w0r;
+
+    u32x w[16];
+
+    w[ 0] = w0lr;
+    w[ 1] = w0[1];
+    w[ 2] = w0[2];
+    w[ 3] = w0[3];
+    w[ 4] = w1[0];
+    w[ 5] = w1[1];
+    w[ 6] = w1[2];
+    w[ 7] = w1[3];
+    w[ 8] = w2[0];
+    w[ 9] = w2[1];
+    w[10] = w2[2];
+    w[11] = w2[3];
+    w[12] = w3[0];
+    w[13] = w3[1];
+    w[14] = pw_len * 8;
+    w[15] = 0;
+
+    /**
+     * RipeMD320
+     */
+
+    u32x dgst[10];
+
+    dgst[0] = RIPEMD320M_A;
+    dgst[1] = RIPEMD320M_B;
+    dgst[2] = RIPEMD320M_C;
+    dgst[3] = RIPEMD320M_D;
+    dgst[4] = RIPEMD320M_E;
+    dgst[5] = RIPEMD320M_F;
+    dgst[6] = RIPEMD320M_G;
+    dgst[7] = RIPEMD320M_H;
+    dgst[8] = RIPEMD320M_I;
+    dgst[9] = RIPEMD320M_L;
+
+    ripemd320_transform_transport_vector (w, dgst);
+
+    COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
+  }
+}
+
+KERNEL_FQ void m33600_m04 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+
+  w0[0] = pws[gid].i[ 0];
+  w0[1] = pws[gid].i[ 1];
+  w0[2] = pws[gid].i[ 2];
+  w0[3] = pws[gid].i[ 3];
+
+  u32 w1[4];
+
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+
+  u32 w2[4];
+
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+
+  u32 w3[4];
+
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m33600_m08 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+
+  w0[0] = pws[gid].i[ 0];
+  w0[1] = pws[gid].i[ 1];
+  w0[2] = pws[gid].i[ 2];
+  w0[3] = pws[gid].i[ 3];
+
+  u32 w1[4];
+
+  w1[0] = pws[gid].i[ 4];
+  w1[1] = pws[gid].i[ 5];
+  w1[2] = pws[gid].i[ 6];
+  w1[3] = pws[gid].i[ 7];
+
+  u32 w2[4];
+
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+
+  u32 w3[4];
+
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m33600_m16 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+
+  w0[0] = pws[gid].i[ 0];
+  w0[1] = pws[gid].i[ 1];
+  w0[2] = pws[gid].i[ 2];
+  w0[3] = pws[gid].i[ 3];
+
+  u32 w1[4];
+
+  w1[0] = pws[gid].i[ 4];
+  w1[1] = pws[gid].i[ 5];
+  w1[2] = pws[gid].i[ 6];
+  w1[3] = pws[gid].i[ 7];
+
+  u32 w2[4];
+
+  w2[0] = pws[gid].i[ 8];
+  w2[1] = pws[gid].i[ 9];
+  w2[2] = pws[gid].i[10];
+  w2[3] = pws[gid].i[11];
+
+  u32 w3[4];
+
+  w3[0] = pws[gid].i[12];
+  w3[1] = pws[gid].i[13];
+  w3[2] = 0;
+  w3[3] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m33600_s04 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+
+  w0[0] = pws[gid].i[ 0];
+  w0[1] = pws[gid].i[ 1];
+  w0[2] = pws[gid].i[ 2];
+  w0[3] = pws[gid].i[ 3];
+
+  u32 w1[4];
+
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+
+  u32 w2[4];
+
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+
+  u32 w3[4];
+
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m33600_s08 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+
+  w0[0] = pws[gid].i[ 0];
+  w0[1] = pws[gid].i[ 1];
+  w0[2] = pws[gid].i[ 2];
+  w0[3] = pws[gid].i[ 3];
+
+  u32 w1[4];
+
+  w1[0] = pws[gid].i[ 4];
+  w1[1] = pws[gid].i[ 5];
+  w1[2] = pws[gid].i[ 6];
+  w1[3] = pws[gid].i[ 7];
+
+  u32 w2[4];
+
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+
+  u32 w3[4];
+
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m33600_s16 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+
+  w0[0] = pws[gid].i[ 0];
+  w0[1] = pws[gid].i[ 1];
+  w0[2] = pws[gid].i[ 2];
+  w0[3] = pws[gid].i[ 3];
+
+  u32 w1[4];
+
+  w1[0] = pws[gid].i[ 4];
+  w1[1] = pws[gid].i[ 5];
+  w1[2] = pws[gid].i[ 6];
+  w1[3] = pws[gid].i[ 7];
+
+  u32 w2[4];
+
+  w2[0] = pws[gid].i[ 8];
+  w2[1] = pws[gid].i[ 9];
+  w2[2] = pws[gid].i[10];
+  w2[3] = pws[gid].i[11];
+
+  u32 w3[4];
+
+  w3[0] = pws[gid].i[12];
+  w3[1] = pws[gid].i[13];
+  w3[2] = 0;
+  w3[3] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
diff --git a/OpenCL/m33600_a3-pure.cl b/OpenCL/m33600_a3-pure.cl
new file mode 100644
index 000000000..ae113ddcd
--- /dev/null
+++ b/OpenCL/m33600_a3-pure.cl
@@ -0,0 +1,138 @@
+
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33600_mxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    ripemd320_ctx_vector_t ctx;
+
+    ripemd320_init_vector (&ctx);
+
+    ripemd320_update_vector (&ctx, w, pw_len);
+
+    ripemd320_final_vector (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33600_sxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    ripemd320_ctx_vector_t ctx;
+
+    ripemd320_init_vector (&ctx);
+
+    ripemd320_update_vector (&ctx, w, pw_len);
+
+    ripemd320_final_vector (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33650_a0-pure.cl b/OpenCL/m33650_a0-pure.cl
new file mode 100644
index 000000000..071cffbc0
--- /dev/null
+++ b/OpenCL/m33650_a0-pure.cl
@@ -0,0 +1,135 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp.h)
+#include M2S(INCLUDE_PATH/inc_rp.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33650_mxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_ctx_t ctx;
+
+    ripemd320_hmac_init (&ctx, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_update (&ctx, s, salt_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33650_sxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_ctx_t ctx;
+
+    ripemd320_hmac_init (&ctx, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_update (&ctx, s, salt_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33650_a1-pure.cl b/OpenCL/m33650_a1-pure.cl
new file mode 100644
index 000000000..3c248fc8d
--- /dev/null
+++ b/OpenCL/m33650_a1-pure.cl
@@ -0,0 +1,183 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33650_mxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    const u32 comb_len = combs_buf[il_pos].pw_len;
+
+    u32 c[64];
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int idx = 0; idx < 64; idx++)
+    {
+      c[idx] = combs_buf[il_pos].i[idx];
+    }
+
+    switch_buffer_by_offset_1x64_le_S (c, pw_len);
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int i = 0; i < 64; i++)
+    {
+      c[i] |= w[i];
+    }
+
+    ripemd320_hmac_ctx_t ctx;
+
+    ripemd320_hmac_init (&ctx, c, pw_len + comb_len);
+
+    ripemd320_hmac_update (&ctx, s, salt_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33650_sxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    const u32 comb_len = combs_buf[il_pos].pw_len;
+
+    u32 c[64];
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int idx = 0; idx < 64; idx++)
+    {
+      c[idx] = combs_buf[il_pos].i[idx];
+    }
+
+    switch_buffer_by_offset_1x64_le_S (c, pw_len);
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int i = 0; i < 64; i++)
+    {
+      c[i] |= w[i];
+    }
+
+    ripemd320_hmac_ctx_t ctx;
+
+    ripemd320_hmac_init (&ctx, c, pw_len + comb_len);
+
+    ripemd320_hmac_update (&ctx, s, salt_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33650_a3-pure.cl b/OpenCL/m33650_a3-pure.cl
new file mode 100644
index 000000000..4511a6438
--- /dev/null
+++ b/OpenCL/m33650_a3-pure.cl
@@ -0,0 +1,155 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33650_mxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    ripemd320_hmac_ctx_vector_t ctx;
+
+    ripemd320_hmac_init_vector (&ctx, w, pw_len);
+
+    ripemd320_hmac_update_vector (&ctx, s, salt_len);
+
+    ripemd320_hmac_final_vector (&ctx);
+
+    const u32x r0 = ctx.opad.h[DGST_R0];
+    const u32x r1 = ctx.opad.h[DGST_R1];
+    const u32x r2 = ctx.opad.h[DGST_R2];
+    const u32x r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33650_sxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    ripemd320_hmac_ctx_vector_t ctx;
+
+    ripemd320_hmac_init_vector (&ctx, w, pw_len);
+
+    ripemd320_hmac_update_vector (&ctx, s, salt_len);
+
+    ripemd320_hmac_final_vector (&ctx);
+
+    const u32x r0 = ctx.opad.h[DGST_R0];
+    const u32x r1 = ctx.opad.h[DGST_R1];
+    const u32x r2 = ctx.opad.h[DGST_R2];
+    const u32x r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33660_a0-pure.cl b/OpenCL/m33660_a0-pure.cl
new file mode 100644
index 000000000..fd7b04dea
--- /dev/null
+++ b/OpenCL/m33660_a0-pure.cl
@@ -0,0 +1,139 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp.h)
+#include M2S(INCLUDE_PATH/inc_rp.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33660_mxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  ripemd320_hmac_ctx_t ctx0;
+
+  ripemd320_hmac_init (&ctx0, s, salt_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_ctx_t ctx = ctx0;
+
+    ripemd320_hmac_update (&ctx, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33660_sxx (KERN_ATTR_RULES ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  ripemd320_hmac_ctx_t ctx0;
+
+  ripemd320_hmac_init (&ctx0, s, salt_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_ctx_t ctx = ctx0;
+
+    ripemd320_hmac_update (&ctx, tmp.i, tmp.pw_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33660_a1-pure.cl b/OpenCL/m33660_a1-pure.cl
new file mode 100644
index 000000000..9d070fa83
--- /dev/null
+++ b/OpenCL/m33660_a1-pure.cl
@@ -0,0 +1,187 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33660_mxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  ripemd320_hmac_ctx_t ctx0;
+
+  ripemd320_hmac_init (&ctx0, s, salt_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    const u32 comb_len = combs_buf[il_pos].pw_len;
+
+    u32 c[64];
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int idx = 0; idx < 64; idx++)
+    {
+      c[idx] = combs_buf[il_pos].i[idx];
+    }
+
+    switch_buffer_by_offset_1x64_le_S (c, pw_len);
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int i = 0; i < 64; i++)
+    {
+      c[i] |= w[i];
+    }
+
+    ripemd320_hmac_ctx_t ctx = ctx0;
+
+    ripemd320_hmac_update (&ctx, c, pw_len + comb_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33660_sxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32 s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  ripemd320_hmac_ctx_t ctx0;
+
+  ripemd320_hmac_init (&ctx0, s, salt_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    const u32 comb_len = combs_buf[il_pos].pw_len;
+
+    u32 c[64];
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int idx = 0; idx < 64; idx++)
+    {
+      c[idx] = combs_buf[il_pos].i[idx];
+    }
+
+    switch_buffer_by_offset_1x64_le_S (c, pw_len);
+
+    #ifdef _unroll
+    #pragma unroll
+    #endif
+    for (int i = 0; i < 64; i++)
+    {
+      c[i] |= w[i];
+    }
+
+    ripemd320_hmac_ctx_t ctx = ctx0;
+
+    ripemd320_hmac_update (&ctx, c, pw_len + comb_len);
+
+    ripemd320_hmac_final (&ctx);
+
+    const u32 r0 = ctx.opad.h[DGST_R0];
+    const u32 r1 = ctx.opad.h[DGST_R1];
+    const u32 r2 = ctx.opad.h[DGST_R2];
+    const u32 r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m33660_a3-pure.cl b/OpenCL/m33660_a3-pure.cl
new file mode 100644
index 000000000..10e42f73d
--- /dev/null
+++ b/OpenCL/m33660_a3-pure.cl
@@ -0,0 +1,159 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
+#endif
+
+KERNEL_FQ void m33660_mxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  ripemd320_hmac_ctx_vector_t ctx0;
+
+  ripemd320_hmac_init_vector (&ctx0, s, salt_len);
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    ripemd320_hmac_ctx_vector_t ctx = ctx0;
+
+    ripemd320_hmac_update_vector (&ctx, w, pw_len);
+
+    ripemd320_hmac_final_vector (&ctx);
+
+    const u32x r0 = ctx.opad.h[DGST_R0];
+    const u32x r1 = ctx.opad.h[DGST_R1];
+    const u32x r2 = ctx.opad.h[DGST_R2];
+    const u32x r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m33660_sxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
+
+  u32x s[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
+  {
+    s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
+  }
+
+  ripemd320_hmac_ctx_vector_t ctx0;
+
+  ripemd320_hmac_init_vector (&ctx0, s, salt_len);
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    ripemd320_hmac_ctx_vector_t ctx = ctx0;
+
+    ripemd320_hmac_update_vector (&ctx, w, pw_len);
+
+    ripemd320_hmac_final_vector (&ctx);
+
+    const u32x r0 = ctx.opad.h[DGST_R0];
+    const u32x r1 = ctx.opad.h[DGST_R1];
+    const u32x r2 = ctx.opad.h[DGST_R2];
+    const u32x r3 = ctx.opad.h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index 103019d6d..991d43615 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -27,6 +27,8 @@
 - Added hash-mode: GPG (AES-128/AES-256 (SHA-256($pass)))
 - Added hash-mode: GPG (AES-128/AES-256 (SHA-512($pass)))
 - Added hash-mode: GPG (CAST5 (SHA-1($pass)))
+- Added hash-mode: HMAC-RIPEMD320 (key = $pass)
+- Added hash-mode: HMAC-RIPEMD320 (key = $salt)
 - Added hash-mode: Kerberos 5, etype 17, AS-REP
 - Added hash-mode: Kerberos 5, etype 18, AS-REP
 - Added hash-mode: MetaMask Mobile Wallet
@@ -39,6 +41,7 @@
 - Added hash-mode: NetIQ SSPR (SHA-1 with Salt)
 - Added hash-mode: NetIQ SSPR (SHA-256 with Salt)
 - Added hash-mode: NetIQ SSPR (SHA-512 with Salt)
+- Added hash-mode: RIPEMD-320
 - Added hash-mode: RC4 104-bit DropN
 - Added hash-mode: RC4 40-bit DropN
 - Added hash-mode: RC4 72-bit DropN
diff --git a/docs/readme.txt b/docs/readme.txt
index 38d1bb594..2f0efe391 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -57,6 +57,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - SHA3-384
 - SHA3-512
 - RIPEMD-160
+- RIPEMD-320
 - BLAKE2b-512
 - BLAKE2s-256
 - SM3
@@ -143,6 +144,8 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - HMAC-MD5 (key = $salt)
 - HMAC-RIPEMD160 (key = $pass)
 - HMAC-RIPEMD160 (key = $salt)
+- HMAC-RIPEMD320 (key = $pass)
+- HMAC-RIPEMD320 (key = $salt)
 - HMAC-SHA1 (key = $pass)
 - HMAC-SHA1 (key = $salt)
 - HMAC-SHA256 (key = $pass)
diff --git a/include/types.h b/include/types.h
index 880af23ee..919e7758a 100644
--- a/include/types.h
+++ b/include/types.h
@@ -481,6 +481,7 @@ typedef enum dgst_size
   DGST_SIZE_4_6  = (6  * sizeof (u32)), // 24
   DGST_SIZE_4_7  = (7  * sizeof (u32)), // 28
   DGST_SIZE_4_8  = (8  * sizeof (u32)), // 32
+  DGST_SIZE_4_10 = (10 * sizeof (u32)), // 40
   DGST_SIZE_4_16 = (16 * sizeof (u32)), // 64 !!!
   DGST_SIZE_4_32 = (32 * sizeof (u32)), // 128 !!!
   DGST_SIZE_4_64 = (64 * sizeof (u32)), // 256
diff --git a/src/modules/module_33600.c b/src/modules/module_33600.c
new file mode 100644
index 000000000..745b95b8f
--- /dev/null
+++ b/src/modules/module_33600.c
@@ -0,0 +1,196 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_10;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH;
+static const char *HASH_NAME      = "RIPEMD-320";
+static const u64   KERN_TYPE      = 33600;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
+                                  | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_PT_ADD80;
+static const u32   SALT_TYPE      = SALT_TYPE_NONE;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "8339009b816d4e4c2a6be3c6e1daac6aca69a7670ecdc583adfca0db17cc8f08ce35d6c759b038ab";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  hc_token_t token;
+
+  memset (&token, 0, sizeof (hc_token_t));
+
+  token.token_cnt  = 1;
+
+  token.len[0]     = 80;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  const u8 *hash_pos = token.buf[0];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+  digest[4] = hex_to_u32 (hash_pos + 32);
+  digest[5] = hex_to_u32 (hash_pos + 40);
+  digest[6] = hex_to_u32 (hash_pos + 48);
+  digest[7] = hex_to_u32 (hash_pos + 56);
+  digest[8] = hex_to_u32 (hash_pos + 64);
+  digest[9] = hex_to_u32 (hash_pos + 72);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  // we can not change anything in the original buffer, otherwise destroying sorting
+  // therefore create some local buffer
+
+  u32 tmp[10];
+
+  tmp[0] = digest[0];
+  tmp[1] = digest[1];
+  tmp[2] = digest[2];
+  tmp[3] = digest[3];
+  tmp[4] = digest[4];
+  tmp[5] = digest[5];
+  tmp[6] = digest[6];
+  tmp[7] = digest[7];
+  tmp[8] = digest[8];
+  tmp[9] = digest[9];
+
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  u32_to_hex (tmp[0], out_buf +  0);
+  u32_to_hex (tmp[1], out_buf +  8);
+  u32_to_hex (tmp[2], out_buf + 16);
+  u32_to_hex (tmp[3], out_buf + 24);
+  u32_to_hex (tmp[4], out_buf + 32);
+  u32_to_hex (tmp[5], out_buf + 40);
+  u32_to_hex (tmp[6], out_buf + 48);
+  u32_to_hex (tmp[7], out_buf + 56);
+  u32_to_hex (tmp[8], out_buf + 64);
+  u32_to_hex (tmp[9], out_buf + 72);
+
+  const int out_len = 80;
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/src/modules/module_33650.c b/src/modules/module_33650.c
new file mode 100644
index 000000000..a3b2d2d07
--- /dev/null
+++ b/src/modules/module_33650.c
@@ -0,0 +1,223 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_10;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_AUTHENTICATED;
+static const char *HASH_NAME      = "HMAC-RIPEMD320 (key = $pass)";
+static const u64   KERN_TYPE      = 33650;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_NOT_ITERATED;
+static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
+                                  | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_ST_ADD80
+                                  | OPTS_TYPE_ST_ADDBITS14;
+static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "e740440e7bd65056a90f1aa4eb00e00308a9f1788866b4eacbd46cfc8032301d4e5b3a9d179be044:95454599772294521162217";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  hc_token_t token;
+
+  memset (&token, 0, sizeof (hc_token_t));
+
+  token.token_cnt  = 2;
+
+  token.sep[0]     = hashconfig->separator;
+  token.len[0]     = 80;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  token.len_min[1] = SALT_MIN;
+  token.len_max[1] = SALT_MAX;
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+  {
+    token.len_min[1] *= 2;
+    token.len_max[1] *= 2;
+
+    token.attr[1] |= TOKEN_ATTR_VERIFY_HEX;
+  }
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  const u8 *hash_pos = token.buf[0];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+  digest[4] = hex_to_u32 (hash_pos + 32);
+  digest[5] = hex_to_u32 (hash_pos + 40);
+  digest[6] = hex_to_u32 (hash_pos + 48);
+  digest[7] = hex_to_u32 (hash_pos + 56);
+  digest[8] = hex_to_u32 (hash_pos + 64);
+  digest[9] = hex_to_u32 (hash_pos + 72);
+
+  const u8 *salt_pos = token.buf[1];
+  const int salt_len = token.len[1];
+
+  const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len);
+
+  if (parse_rc == false) return (PARSER_SALT_LENGTH);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  // we can not change anything in the original buffer, otherwise destroying sorting
+  // therefore create some local buffer
+
+  u32 tmp[10];
+
+  tmp[0] = digest[0];
+  tmp[1] = digest[1];
+  tmp[2] = digest[2];
+  tmp[3] = digest[3];
+  tmp[4] = digest[4];
+  tmp[5] = digest[5];
+  tmp[6] = digest[6];
+  tmp[7] = digest[7];
+  tmp[8] = digest[8];
+  tmp[9] = digest[9];
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = 0;
+
+  u32_to_hex (tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[3], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[4], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[5], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[6], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[7], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[8], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[9], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = hashconfig->separator;
+
+  out_len += 1;
+
+  out_len += generic_salt_encode (hashconfig, (const u8 *) salt->salt_buf, (const int) salt->salt_len, out_buf + out_len);
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/src/modules/module_33660.c b/src/modules/module_33660.c
new file mode 100644
index 000000000..1c0dfbc1a
--- /dev/null
+++ b/src/modules/module_33660.c
@@ -0,0 +1,223 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_10;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_AUTHENTICATED;
+static const char *HASH_NAME      = "HMAC-RIPEMD320 (key = $salt)";
+static const u64   KERN_TYPE      = 33660;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_NOT_ITERATED;
+static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
+                                  | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_PT_ADD80
+                                  | OPTS_TYPE_PT_ADDBITS14;
+static const u32   SALT_TYPE      = SALT_TYPE_GENERIC;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "345136b13b3a6e52901e2a414efa0cf5fca2fecf8b03279656d3b0f42c30df3006c5ad186494996b:2436077107013929602";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  hc_token_t token;
+
+  memset (&token, 0, sizeof (hc_token_t));
+
+  token.token_cnt  = 2;
+
+  token.sep[0]     = hashconfig->separator;
+  token.len[0]     = 80;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  token.len_min[1] = SALT_MIN;
+  token.len_max[1] = SALT_MAX;
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  if (hashconfig->opts_type & OPTS_TYPE_ST_HEX)
+  {
+    token.len_min[1] *= 2;
+    token.len_max[1] *= 2;
+
+    token.attr[1] |= TOKEN_ATTR_VERIFY_HEX;
+  }
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  const u8 *hash_pos = token.buf[0];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+  digest[4] = hex_to_u32 (hash_pos + 32);
+  digest[5] = hex_to_u32 (hash_pos + 40);
+  digest[6] = hex_to_u32 (hash_pos + 48);
+  digest[7] = hex_to_u32 (hash_pos + 56);
+  digest[8] = hex_to_u32 (hash_pos + 64);
+  digest[9] = hex_to_u32 (hash_pos + 72);
+
+  const u8 *salt_pos = token.buf[1];
+  const int salt_len = token.len[1];
+
+  const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len);
+
+  if (parse_rc == false) return (PARSER_SALT_LENGTH);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  // we can not change anything in the original buffer, otherwise destroying sorting
+  // therefore create some local buffer
+
+  u32 tmp[10];
+
+  tmp[0] = digest[0];
+  tmp[1] = digest[1];
+  tmp[2] = digest[2];
+  tmp[3] = digest[3];
+  tmp[4] = digest[4];
+  tmp[5] = digest[5];
+  tmp[6] = digest[6];
+  tmp[7] = digest[7];
+  tmp[8] = digest[8];
+  tmp[9] = digest[9];
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = 0;
+
+  u32_to_hex (tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[3], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[4], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[5], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[6], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[7], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[8], out_buf + out_len); out_len += 8;
+  u32_to_hex (tmp[9], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = hashconfig->separator;
+
+  out_len += 1;
+
+  out_len += generic_salt_encode (hashconfig, (const u8 *) salt->salt_buf, (const int) salt->salt_len, out_buf + out_len);
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m33600.pm b/tools/test_modules/m33600.pm
new file mode 100644
index 000000000..2f857f8ee
--- /dev/null
+++ b/tools/test_modules/m33600.pm
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Crypt::Digest::RIPEMD320 qw (ripemd320_hex);
+
+sub module_constraints { [[0, 256], [-1, -1], [0, 55], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+
+  my $digest = ripemd320_hex ($word);
+
+  my $hash = sprintf ("%s", $digest);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my ($hash, $word) = split (':', $line);
+
+  return unless defined $hash;
+  return unless defined $word;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed);
+
+  return ($new_hash, $word);
+}
+
+1;
diff --git a/tools/test_modules/m33650.pm b/tools/test_modules/m33650.pm
new file mode 100644
index 000000000..c4340b22f
--- /dev/null
+++ b/tools/test_modules/m33650.pm
@@ -0,0 +1,45 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Crypt::Digest::RIPEMD320 qw (ripemd320);
+use Digest::HMAC qw (hmac_hex);
+
+sub module_constraints { [[0, 256], [0, 256], [0, 55], [0, 55], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+
+  my $digest = hmac_hex ($salt, $word, \&ripemd320, 64);
+
+  my $hash = sprintf ("%s:%s", $digest, $salt);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my ($hash, $salt, $word) = split (':', $line);
+
+  return unless defined $hash;
+  return unless defined $salt;
+  return unless defined $word;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt);
+
+  return ($new_hash, $word);
+}
+
+1;
diff --git a/tools/test_modules/m33660.pm b/tools/test_modules/m33660.pm
new file mode 100644
index 000000000..3a6f1a978
--- /dev/null
+++ b/tools/test_modules/m33660.pm
@@ -0,0 +1,45 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Crypt::Digest::RIPEMD320 qw (ripemd320);
+use Digest::HMAC qw (hmac_hex);
+
+sub module_constraints { [[0, 256], [0, 256], [0, 55], [0, 55], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+
+  my $digest = hmac_hex ($word, $salt, \&ripemd320, 64);
+
+  my $hash = sprintf ("%s:%s", $digest, $salt);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my ($hash, $salt, $word) = split (':', $line);
+
+  return unless defined $hash;
+  return unless defined $salt;
+  return unless defined $word;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt);
+
+  return ($new_hash, $word);
+}
+
+1;

From a9cbc975dcee3f5f5e6ffbea36ef156dcde4aa84 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Fri, 30 May 2025 08:04:29 +0200
Subject: [PATCH 26/83] porting modules 33600, 33650, 33660 to v7

---
 src/modules/module_33600.c | 2 ++
 src/modules/module_33650.c | 2 ++
 src/modules/module_33660.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/src/modules/module_33600.c b/src/modules/module_33600.c
index 745b95b8f..8e2fa841e 100644
--- a/src/modules/module_33600.c
+++ b/src/modules/module_33600.c
@@ -126,6 +126,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
   module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
   module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_bridge_name              = MODULE_DEFAULT;
+  module_ctx->module_bridge_type              = MODULE_DEFAULT;
   module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
   module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
   module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
diff --git a/src/modules/module_33650.c b/src/modules/module_33650.c
index a3b2d2d07..e217618cd 100644
--- a/src/modules/module_33650.c
+++ b/src/modules/module_33650.c
@@ -153,6 +153,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
   module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
   module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_bridge_name              = MODULE_DEFAULT;
+  module_ctx->module_bridge_type              = MODULE_DEFAULT;
   module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
   module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
   module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
diff --git a/src/modules/module_33660.c b/src/modules/module_33660.c
index 1c0dfbc1a..9bbf9017b 100644
--- a/src/modules/module_33660.c
+++ b/src/modules/module_33660.c
@@ -153,6 +153,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
   module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
   module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_bridge_name              = MODULE_DEFAULT;
+  module_ctx->module_bridge_type              = MODULE_DEFAULT;
   module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
   module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
   module_ctx->module_deprecated_notice        = MODULE_DEFAULT;

From 38a94ce4efbb269a3708f48052dd048f7c985b8f Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Fri, 30 May 2025 08:06:50 +0200
Subject: [PATCH 27/83] porting module 33000 to v7

---
 src/modules/module_33000.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/modules/module_33000.c b/src/modules/module_33000.c
index 67a040e17..47739b4ea 100644
--- a/src/modules/module_33000.c
+++ b/src/modules/module_33000.c
@@ -231,6 +231,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
   module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
   module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_bridge_name              = MODULE_DEFAULT;
+  module_ctx->module_bridge_type              = MODULE_DEFAULT;
   module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
   module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
   module_ctx->module_deprecated_notice        = MODULE_DEFAULT;

From 310e9ee79a75738c6cd4723d88f367170afed10f Mon Sep 17 00:00:00 2001
From: Chick3nman <admin@chick3nman.com>
Date: Fri, 30 May 2025 14:13:43 -0500
Subject: [PATCH 28/83] Add --total-candidates flag and functionality

---
 docs/changes.txt   |  1 +
 include/types.h    |  4 ++++
 src/hashcat.c      |  1 +
 src/main.c         | 13 +++++++++++++
 src/terminal.c     | 13 +++++++------
 src/usage.c        |  1 +
 src/user_options.c | 20 ++++++++++++++++++++
 7 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 8521d5591..c7ed132a2 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -66,6 +66,7 @@
 
 - Added new feature (-Y) that creates N virtual instances for each device in your system at the cost of N times the device memory consumption
 - Added options --benchmark-min and --benchmark-max to set a hash-mode range to be used during the benchmark
+- Added option --total-candidates to provide the total candidate count for an attack insteda of the internal "--keyspace" value
 
 ##
 ## Performance
diff --git a/include/types.h b/include/types.h
index 880af23ee..f6f82c992 100644
--- a/include/types.h
+++ b/include/types.h
@@ -107,6 +107,7 @@ typedef enum event_identifier
   EVENT_BITMAP_INIT_PRE           = 0x00000011,
   EVENT_BITMAP_FINAL_OVERFLOW     = 0x00000012,
   EVENT_CALCULATED_WORDS_BASE     = 0x00000020,
+  EVENT_CALCULATED_WORDS_CNT      = 0x00000021,
   EVENT_CRACKER_FINISHED          = 0x00000030,
   EVENT_CRACKER_HASH_CRACKED      = 0x00000031,
   EVENT_CRACKER_STARTING          = 0x00000032,
@@ -666,6 +667,7 @@ typedef enum user_options_defaults
   KERNEL_LOOPS             = 0,
   KERNEL_THREADS           = 0,
   KEYSPACE                 = false,
+  TOTAL_CANDIDATES         = false,
   LEFT                     = false,
   LIMIT                    = 0,
   LOGFILE                  = true,
@@ -843,6 +845,7 @@ typedef enum user_options_map
   IDX_STATUS_TIMER              = 0xff4c,
   IDX_STDOUT_FLAG               = 0xff4d,
   IDX_STDIN_TIMEOUT_ABORT       = 0xff4e,
+  IDX_TOTAL_CANDIDATES          = 0xff58,
   IDX_TRUECRYPT_KEYFILES        = 0xff4f,
   IDX_USERNAME                  = 0xff50,
   IDX_VERACRYPT_KEYFILES        = 0xff51,
@@ -2357,6 +2360,7 @@ typedef struct user_options
   bool         increment;
   bool         keep_guessing;
   bool         keyspace;
+  bool         total_candidates;
   bool         left;
   bool         logfile;
   bool         loopback;
diff --git a/src/hashcat.c b/src/hashcat.c
index e1607450c..a691d45aa 100644
--- a/src/hashcat.c
+++ b/src/hashcat.c
@@ -131,6 +131,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
   status_ctx->words_base = status_ctx->words_cnt / amplifier_cnt;
 
   EVENT (EVENT_CALCULATED_WORDS_BASE);
+  EVENT (EVENT_CALCULATED_WORDS_CNT);
 
   if (user_options->keyspace == true)
   {
diff --git a/src/main.c b/src/main.c
index 085ce38b4..6d21cf92f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -370,10 +370,22 @@ static void main_calculated_words_base (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
   const user_options_t *user_options = hashcat_ctx->user_options;
 
   if (user_options->keyspace == false) return;
+  if (user_options->total_candidates == true) return;
 
   event_log_info (hashcat_ctx, "%" PRIu64 "", status_ctx->words_base);
 }
 
+static void main_calculated_words_cnt (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
+{
+  const status_ctx_t   *status_ctx   = hashcat_ctx->status_ctx;
+  const user_options_t *user_options = hashcat_ctx->user_options;
+
+  if (user_options->keyspace == false) return;
+  if (user_options->total_candidates == false) return;
+
+  event_log_info (hashcat_ctx, "%" PRIu64 "", status_ctx->words_cnt);
+}
+
 static void main_potfile_remove_parse_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
@@ -1171,6 +1183,7 @@ static void event (const u32 id, hashcat_ctx_t *hashcat_ctx, const void *buf, co
     case EVENT_BITMAP_INIT_PRE:           main_bitmap_init_pre           (hashcat_ctx, buf, len); break;
     case EVENT_BITMAP_FINAL_OVERFLOW:     main_bitmap_final_overflow     (hashcat_ctx, buf, len); break;
     case EVENT_CALCULATED_WORDS_BASE:     main_calculated_words_base     (hashcat_ctx, buf, len); break;
+    case EVENT_CALCULATED_WORDS_CNT:      main_calculated_words_cnt      (hashcat_ctx, buf, len); break;
     case EVENT_CRACKER_FINISHED:          main_cracker_finished          (hashcat_ctx, buf, len); break;
     case EVENT_CRACKER_HASH_CRACKED:      main_cracker_hash_cracked      (hashcat_ctx, buf, len); break;
     case EVENT_CRACKER_STARTING:          main_cracker_starting          (hashcat_ctx, buf, len); break;
diff --git a/src/terminal.c b/src/terminal.c
index d87faed19..5afbf5735 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -28,12 +28,13 @@ void welcome_screen (hashcat_ctx_t *hashcat_ctx, const char *version_tag)
 {
   const user_options_t *user_options = hashcat_ctx->user_options;
 
-  if (user_options->quiet       == true) return;
-  if (user_options->keyspace    == true) return;
-  if (user_options->stdout_flag == true) return;
-  if (user_options->show        == true) return;
-  if (user_options->left        == true) return;
-  if (user_options->identify    == true) return;
+  if (user_options->quiet       == true)      return;
+  if (user_options->keyspace    == true)      return;
+  if (user_options->total_candidates == true) return;
+  if (user_options->stdout_flag == true)      return;
+  if (user_options->show        == true)      return;
+  if (user_options->left        == true)      return;
+  if (user_options->identify    == true)      return;
 
   if (user_options->usage > 0)
   {
diff --git a/src/usage.c b/src/usage.c
index 870c790d0..612c34c04 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -120,6 +120,7 @@ static const char *const USAGE_BIG_PRE_HASHMODES[] =
   " -s, --skip                     | Num  | Skip X words from the start                          | -s 1000000",
   " -l, --limit                    | Num  | Limit X words from the start + skipped words         | -l 1000000",
   "     --keyspace                 |      | Show keyspace base:mod values and quit               |",
+  "     --total-candidates         |      | Show total candidate count (base*mod) and quit       |",
   " -j, --rule-left                | Rule | Single rule applied to each word from left wordlist  | -j 'c'",
   " -k, --rule-right               | Rule | Single rule applied to each word from right wordlist | -k '^-'",
   " -r, --rules-file               | File | Multiple rules applied to each word from wordlists   | -r rules/best64.rule",
diff --git a/src/user_options.c b/src/user_options.c
index 2adfc7ac2..8c0adeeb1 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -87,6 +87,7 @@ static const struct option long_options[] =
   {"kernel-threads",            required_argument, NULL, IDX_KERNEL_THREADS},
   {"keyboard-layout-mapping",   required_argument, NULL, IDX_KEYBOARD_LAYOUT_MAPPING},
   {"keyspace",                  no_argument,       NULL, IDX_KEYSPACE},
+  {"total-candidates",          no_argument,       NULL, IDX_TOTAL_CANDIDATES},
   {"left",                      no_argument,       NULL, IDX_LEFT},
   {"limit",                     required_argument, NULL, IDX_LIMIT},
   {"logfile-disable",           no_argument,       NULL, IDX_LOGFILE_DISABLE},
@@ -234,6 +235,7 @@ int user_options_init (hashcat_ctx_t *hashcat_ctx)
   user_options->kernel_threads            = KERNEL_THREADS;
   user_options->keyboard_layout_mapping   = NULL;
   user_options->keyspace                  = KEYSPACE;
+  user_options->total_candidates          = TOTAL_CANDIDATES;
   user_options->left                      = LEFT;
   user_options->limit                     = LIMIT;
   user_options->logfile                   = LOGFILE;
@@ -430,6 +432,7 @@ int user_options_getopt (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
                                           user_options->limit_chgd                = true;                            break;
       case IDX_KEEP_GUESSING:             user_options->keep_guessing             = true;                            break;
       case IDX_KEYSPACE:                  user_options->keyspace                  = true;                            break;
+      case IDX_TOTAL_CANDIDATES:          user_options->total_candidates          = true;                            break;
       case IDX_BENCHMARK:                 user_options->benchmark                 = true;                            break;
       case IDX_BENCHMARK_ALL:             user_options->benchmark_all             = true;                            break;
       case IDX_BENCHMARK_MAX:             user_options->benchmark_max             = hc_strtoul (optarg, NULL, 10);   break;
@@ -1816,6 +1819,11 @@ void user_options_session_auto (hashcat_ctx_t *hashcat_ctx)
       user_options->session = "progress_only";
     }
 
+    if (user_options->total_candidates == true)
+    {
+      user_options->session = "candidates";
+    }
+
     if (user_options->keyspace == true)
     {
       user_options->session = "keyspace";
@@ -1881,6 +1889,7 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->hash_info        == true
    || user_options->keyspace         == true
+   || user_options->total_candidates == true
    || user_options->speed_only       == true
    || user_options->progress_only    == true
    || user_options->identify         == true
@@ -1951,6 +1960,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
     user_options->speed_only = true;
   }
 
+  if (user_options->total_candidates == true)
+  {
+    user_options->quiet = true;
+  }
+
   if (user_options->keyspace == true)
   {
     user_options->quiet = true;
@@ -1961,6 +1975,11 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
     user_options->backend_vector_width = 1;
   }
 
+  if (user_options->total_candidates == true)
+  {
+    user_options->keyspace = true;
+  }
+
   if (user_options->stdout_flag == true)
   {
     user_options->force                 = true;
@@ -3303,6 +3322,7 @@ void user_options_logger (hashcat_ctx_t *hashcat_ctx)
   logfile_top_uint   (user_options->kernel_loops);
   logfile_top_uint   (user_options->kernel_threads);
   logfile_top_uint   (user_options->keyspace);
+  logfile_top_uint   (user_options->total_candidates);
   logfile_top_uint   (user_options->left);
   logfile_top_uint   (user_options->logfile);
   logfile_top_uint   (user_options->loopback);

From 5b1d73e7d7b0c61d1c36d1e5a945a2cfe98b019f Mon Sep 17 00:00:00 2001
From: Dhruv <dhruv2015@hotmail.co.uk>
Date: Mon, 23 Jun 2025 14:59:31 +0100
Subject: [PATCH 29/83] Update hashcat-assimilation-bridge-development.md

---
 docs/hashcat-assimilation-bridge-development.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/hashcat-assimilation-bridge-development.md b/docs/hashcat-assimilation-bridge-development.md
index 72794c494..e57bd0b7e 100644
--- a/docs/hashcat-assimilation-bridge-development.md
+++ b/docs/hashcat-assimilation-bridge-development.md
@@ -6,12 +6,12 @@ The following section is for plugin and bridge developers. It contains low-level
 
 ## Update existing plugins
 
-In case you have written a hashcat plugin, you need to update the init function and add the following two lines:
+In case you have written a Hashcat plugin, you need to update the init function and add the following two lines:
 
 +  module_ctx->module_bridge_name = MODULE_DEFAULT;
 +  module_ctx->module_bridge_type = MODULE_DEFAULT;
 
-Existing modules on hashcat repository will be automatically updated.
+Existing modules on Hashcat repository will be automatically updated.
 
 ## Plugin Integration and Bridge Registration
 
@@ -35,7 +35,7 @@ Hashcat loads the bridge dynamically and uses it for any declared invocation.
 
 Note that bridges only load for outside kernel, aka "slow hash" kernels. In "fast hash" kernels, such as MD5, they are ignored. In case you want to implement a "fast hash" + bridge hybrid, you can move the "fast hash" code into a new "slow hash" kernel.
 
-Here's a high-level view on how hashcat executes several key points during a password batch:
+Here's a high-level view on how Hashcat executes several key points during a password batch:
 
 ```
 ATTACK_EXEC_OUTSIDE_KERNEL:
@@ -50,7 +50,7 @@ ATTACK_EXEC_OUTSIDE_KERNEL:
     RUN_PREPARE
     ITER_REPEATS:
       RUN_LOOP
-      RUN_EXTENTED
+      RUN_EXTENDED
     COPY_BRIDGE_MATERIAL_TO_HOST
     BRIDGE_LAUNCH_LOOP
     COPY_BRIDGE_MATERIAL_TO_DEVICE
@@ -110,7 +110,7 @@ From the bridge_init() function you have access to the following generic paramet
 
 ## Virtual Backend Devices
 
-This feature is available also outside of bridges, eg in order to increase some workload on a compute device, but it was added in the first place to support bridges. The main problem is that it's possible that a bridge return 2 bridge units which may have different speeds (clocking), or an ideal batch size. The time it takes to compute a certain batch of passwords would be different, so there was a need for an asynchronous execution strategy. Hashcat supports mixed speed device types, but that typically mean "backend" devices. To solve the issue, we partition (virtualize) one physical backend device into multiple virtual backend devices (done internally by hashcat), and "link" each of the virtual backend device to a bridge unit. Due to this binding we can support bridge units of different speed. There's two flags a user can control in regard to virtual device backend:
+This feature is available also outside of bridges, eg in order to increase some workload on a compute device, but it was added in the first place to support bridges. The main problem is that it's possible that a bridge return 2 bridge units which may have different speeds (clocking), or an ideal batch size. The time it takes to compute a certain batch of passwords would be different, so there was a need for an asynchronous execution strategy. Hashcat supports mixed speed device types, but that typically mean "backend" devices. To solve the issue, we partition (virtualize) one physical backend device into multiple virtual backend devices (done internally by Hashcat), and "link" each of the virtual backend device to a bridge unit. Due to this binding we can support bridge units of different speed. There's two flags a user can control in regard to virtual device backend:
 
 * Use `-Y` to define how many virtual backend devices to create.
 * Use `-R` to bind these virtual devices to a physical backend host (new in v7).
@@ -130,7 +130,7 @@ src/bridges/bridge_scrypt_jane.mk
 
 The target output should be named like this: `bridges/bridge_scrypt_jane.so` and `bridges/bridge_scrypt_jane.dll`. Use any of the existing `.mk` files as template.
 
-When hashcat starts, it finds the plugin using this pathfinder:
+When Hashcat starts, it finds the plugin using this pathfinder:
 
 ```
   #if defined (_WIN) || defined (__CYGWIN__)

From 79709f080e7ce7a87522c5d562438ea2fccc4aa6 Mon Sep 17 00:00:00 2001
From: Dhruv <dhruv2015@hotmail.co.uk>
Date: Mon, 23 Jun 2025 15:08:22 +0100
Subject: [PATCH 30/83] Update hashcat-assimilation-bridge.md

---
 docs/hashcat-assimilation-bridge.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/hashcat-assimilation-bridge.md b/docs/hashcat-assimilation-bridge.md
index f1606f405..dd039e861 100644
--- a/docs/hashcat-assimilation-bridge.md
+++ b/docs/hashcat-assimilation-bridge.md
@@ -70,4 +70,4 @@ Depending on interface compatibility, code from other password cracking tools (e
 
 The Assimilation Bridge introduces a highly extensible mechanism to integrate custom compute resources and logic into Hashcat.
 
-For hands-on examples and developer guidance, refer to the accompanying documentation in `docs/hashcat-assimiliation-bridge-development.md` (first draft).
+For hands-on examples and developer guidance, refer to the accompanying documentation in `docs/hashcat-assimilation-bridge-development.md` (first draft).

From d23d4030d0f30049247fb6091b5b4a6de2377c7c Mon Sep 17 00:00:00 2001
From: Dhruv <dhruv2015@hotmail.co.uk>
Date: Mon, 23 Jun 2025 15:29:45 +0100
Subject: [PATCH 31/83] Update hashcat-assimilation-bridge-development.md

---
 ...hashcat-assimilation-bridge-development.md | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/hashcat-assimilation-bridge-development.md b/docs/hashcat-assimilation-bridge-development.md
index e57bd0b7e..a80689df2 100644
--- a/docs/hashcat-assimilation-bridge-development.md
+++ b/docs/hashcat-assimilation-bridge-development.md
@@ -6,12 +6,12 @@ The following section is for plugin and bridge developers. It contains low-level
 
 ## Update existing plugins
 
-In case you have written a Hashcat plugin, you need to update the init function and add the following two lines:
+In case you have written a hashcat plugin, you need to update the init function and add the following two lines:
 
 +  module_ctx->module_bridge_name = MODULE_DEFAULT;
 +  module_ctx->module_bridge_type = MODULE_DEFAULT;
 
-Existing modules on Hashcat repository will be automatically updated.
+Existing modules on hashcat repository will be automatically updated.
 
 ## Plugin Integration and Bridge Registration
 
@@ -23,7 +23,7 @@ static const u64   BRIDGE_TYPE = BRIDGE_TYPE_MATCH_TUNINGS
 static const char *BRIDGE_NAME = "scrypt_jane";
 ```
 
-* `BRIDGE_NAME` tells Hashcat which bridge to load (e.g., `bridge_scrypt_jane.so`).
+* `BRIDGE_NAME` tells hashcat which bridge to load (e.g., `bridge_scrypt_jane.so`).
 * `BRIDGE_TYPE` indicates which backend kernel functions the bridge will override:
 
   * `BRIDGE_TYPE_LAUNCH_LOOP`:   Entry point for all bridges that register to run after `RUN_LOOP`
@@ -31,11 +31,11 @@ static const char *BRIDGE_NAME = "scrypt_jane";
   * `BRIDGE_TYPE_REPLACE_LOOP`:  Same as BRIDGE_TYPE_LAUNCH_LOOP, but deactivates `RUN_LOOP`
   * `BRIDGE_TYPE_REPLACE_LOOP2`: Same as BRIDGE_TYPE_LAUNCH_LOOP2, but deactivates `RUN_LOOP2`
 
-Hashcat loads the bridge dynamically and uses it for any declared invocation.
+hashcat loads the bridge dynamically and uses it for any declared invocation.
 
 Note that bridges only load for outside kernel, aka "slow hash" kernels. In "fast hash" kernels, such as MD5, they are ignored. In case you want to implement a "fast hash" + bridge hybrid, you can move the "fast hash" code into a new "slow hash" kernel.
 
-Here's a high-level view on how Hashcat executes several key points during a password batch:
+Here's a high-level view on how hashcat executes several key points during a password batch:
 
 ```
 ATTACK_EXEC_OUTSIDE_KERNEL:
@@ -75,16 +75,16 @@ ATTACK_EXEC_OUTSIDE_KERNEL:
 - COPY_* refers to host-to-device or device-to-host copies and typically involve PCIe data transfer.
 - CALL_* are code functions executed on the host CPU. They are plugin-specific and defined in a module. They were the predecessor of bridges but are still usable.
 - SALT_* typically are optional steps which allow certain algorithms specific optimizations. For instance in Scrypt with P > 1, the V and XY buffer can be reused and allow temporary storage of result values into B. This saves memory requirement, improving parallelization
-- ITER_* is the main loop that chunks what typically is defined as "iterations" in a algorithm computation. For instance a PBKDF2 function is called with 10,000 iterations, which would take a while to compute. The time this takes could be longer than a GPU drivers watchdog allows (before it resets the compute engine.). Hashcat will divide the 10,000 into chunks of let's say 1,000 and call the same kernel 10 times
+- ITER_* is the main loop that chunks what typically is defined as "iterations" in a algorithm computation. For instance a PBKDF2 function is called with 10,000 iterations, which would take a while to compute. The time this takes could be longer than a GPU drivers watchdog allows (before it resets the compute engine.). hashcat will divide the 10,000 into chunks of let's say 1,000 and call the same kernel 10 times
 - BRIDGE_* existing bridge entry points. During the "lifetime" of a hash computation the tmps[] variable is used (algorithm specific, so defined in the specific plugin module and kernel). This variable is which we refer to as bridge material, but it's possible we add other types of variables to "material" in the future
 - ITER2/LOOP2: Optional entry points in case the algorithm consists of two types of long running (high iterated) sub-components. For instance one iteration of 10k loops sha256 followed by 100k loops of sha512, or bcrypt followed by scrypt
 
   * `BRIDGE_TYPE_LAUNCH_INIT`
   * `BRIDGE_TYPE_LAUNCH_COMP`
 
-Hashcat devs will add support on request.
+hashcat devs will add support on request.
 
-As mentioned in the BRIDGE_* entry points, it's the developer's responsibility to ensure compatibility. That typically means the handling of the `tmps` variable relevant in the `kernel_loop` and how it changes over algorithm computations lifetime. Hashcat will take care of copying the data from and to the compute backend buffers (bridge material).
+As mentioned in the BRIDGE_* entry points, it's the developer's responsibility to ensure compatibility. That typically means the handling of the `tmps` variable relevant in the `kernel_loop` and how it changes over algorithm computations lifetime. hashcat will take care of copying the data from and to the compute backend buffers (bridge material).
 
 But the bridge developer must ensure data transformation compatibility. For instance, if we replace the loop section in SCRYPT (8900), the long running part is the smix() activity. But SCRYPT implements the PBKDF2 handling in both init and comp kernels, preparing the values in B[] after the init kernel, and expecting modified values in B[] before running comp kernel. If you want to replace the smix() section with let's say FPGA code, the bridge needs to understand the structure of the tmps[] variable. In this case tmps[] just reflect SCRYPT B[], making this simple, but other algorithms may require more than just one large buffer array. That means the structure itself (datatypes), but also the amount of workitems, because there's almost always more than one workitem (to reduce overhead times).
 
@@ -95,7 +95,7 @@ There's some more BRIDGE PARAMETERs that you should know:
 
 ## How Bridges Work
 
-When Hashcat starts with a plugin that specifies a bridge, it loads the bridge and invokes its initialization function. The bridge must then discover its internal compute units, called *bridge units*. Handling the units must be implemented by the bridge developer, and typically involves loading some library, init it, and retrieve some resources available, for instances loading XRT, asking how many FPGA are available. If there's two FPGA, then the bridge unit count would be two. You also need to provide some detailed information on the unit itself, for instance the name of the device, or version or your software solution if it's not a hardware.
+When hashcat starts with a plugin that specifies a bridge, it loads the bridge and invokes its initialization function. The bridge must then discover its internal compute units, called *bridge units*. Handling the units must be implemented by the bridge developer, and typically involves loading some library, init it, and retrieve some resources available, for instances loading XRT, asking how many FPGA are available. If there's two FPGA, then the bridge unit count would be two. You also need to provide some detailed information on the unit itself, for instance the name of the device, or version or your software solution if it's not a hardware.
 
 Each of these bridge unit maps to one virtual backend device, which allows asynchronous and independent parallel execution, and this were virtual backend devices become relevant. Read section about virtual backend devices for a better understanding
 
@@ -110,7 +110,7 @@ From the bridge_init() function you have access to the following generic paramet
 
 ## Virtual Backend Devices
 
-This feature is available also outside of bridges, eg in order to increase some workload on a compute device, but it was added in the first place to support bridges. The main problem is that it's possible that a bridge return 2 bridge units which may have different speeds (clocking), or an ideal batch size. The time it takes to compute a certain batch of passwords would be different, so there was a need for an asynchronous execution strategy. Hashcat supports mixed speed device types, but that typically mean "backend" devices. To solve the issue, we partition (virtualize) one physical backend device into multiple virtual backend devices (done internally by Hashcat), and "link" each of the virtual backend device to a bridge unit. Due to this binding we can support bridge units of different speed. There's two flags a user can control in regard to virtual device backend:
+This feature is available also outside of bridges, eg in order to increase some workload on a compute device, but it was added in the first place to support bridges. The main problem is that it's possible that a bridge return 2 bridge units which may have different speeds (clocking), or an ideal batch size. The time it takes to compute a certain batch of passwords would be different, so there was a need for an asynchronous execution strategy. hashcat supports mixed speed device types, but that typically mean "backend" devices. To solve the issue, we partition (virtualize) one physical backend device into multiple virtual backend devices (done internally by hashcat), and "link" each of the virtual backend device to a bridge unit. Due to this binding we can support bridge units of different speed. There's two flags a user can control in regard to virtual device backend:
 
 * Use `-Y` to define how many virtual backend devices to create.
 * Use `-R` to bind these virtual devices to a physical backend host (new in v7).
@@ -130,7 +130,7 @@ src/bridges/bridge_scrypt_jane.mk
 
 The target output should be named like this: `bridges/bridge_scrypt_jane.so` and `bridges/bridge_scrypt_jane.dll`. Use any of the existing `.mk` files as template.
 
-When Hashcat starts, it finds the plugin using this pathfinder:
+When hashcat starts, it finds the plugin using this pathfinder:
 
 ```
   #if defined (_WIN) || defined (__CYGWIN__)

From 0869e7c1bb82815728989c8b70fe9c22047789c4 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 26 Jun 2025 21:55:07 +0200
Subject: [PATCH 32/83] change KERNEL_FQ to KERNEL_FQ KERNEL_FA statements

---
 OpenCL/m33600_a0-optimized.cl | 12 ++++++------
 OpenCL/m33600_a0-pure.cl      |  4 ++--
 OpenCL/m33600_a1-optimized.cl | 12 ++++++------
 OpenCL/m33600_a1-pure.cl      |  4 ++--
 OpenCL/m33600_a3-optimized.cl | 12 ++++++------
 OpenCL/m33600_a3-pure.cl      |  4 ++--
 OpenCL/m33650_a0-pure.cl      |  4 ++--
 OpenCL/m33650_a1-pure.cl      |  4 ++--
 OpenCL/m33650_a3-pure.cl      |  4 ++--
 OpenCL/m33660_a0-pure.cl      |  4 ++--
 OpenCL/m33660_a1-pure.cl      |  4 ++--
 OpenCL/m33660_a3-pure.cl      |  4 ++--
 12 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/OpenCL/m33600_a0-optimized.cl b/OpenCL/m33600_a0-optimized.cl
index 8e120caac..74e0b507d 100644
--- a/OpenCL/m33600_a0-optimized.cl
+++ b/OpenCL/m33600_a0-optimized.cl
@@ -22,7 +22,7 @@ DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PR
   ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
 }
 
-KERNEL_FQ void m33600_m04 (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_m04 (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -109,15 +109,15 @@ KERNEL_FQ void m33600_m04 (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m33600_m08 (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_m08 (KERN_ATTR_RULES ())
 {
 }
 
-KERNEL_FQ void m33600_m16 (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_m16 (KERN_ATTR_RULES ())
 {
 }
 
-KERNEL_FQ void m33600_s04 (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_s04 (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -216,10 +216,10 @@ KERNEL_FQ void m33600_s04 (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m33600_s08 (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_s08 (KERN_ATTR_RULES ())
 {
 }
 
-KERNEL_FQ void m33600_s16 (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_s16 (KERN_ATTR_RULES ())
 {
 }
diff --git a/OpenCL/m33600_a0-pure.cl b/OpenCL/m33600_a0-pure.cl
index 9839edc58..d307c7cbe 100644
--- a/OpenCL/m33600_a0-pure.cl
+++ b/OpenCL/m33600_a0-pure.cl
@@ -17,7 +17,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33600_mxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_mxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -61,7 +61,7 @@ KERNEL_FQ void m33600_mxx (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m33600_sxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33600_sxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33600_a1-optimized.cl b/OpenCL/m33600_a1-optimized.cl
index efa719980..424f04bac 100644
--- a/OpenCL/m33600_a1-optimized.cl
+++ b/OpenCL/m33600_a1-optimized.cl
@@ -20,7 +20,7 @@ DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PR
   ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
 }
 
-KERNEL_FQ void m33600_m04 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_m04 (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -165,15 +165,15 @@ KERNEL_FQ void m33600_m04 (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m33600_m08 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_m08 (KERN_ATTR_BASIC ())
 {
 }
 
-KERNEL_FQ void m33600_m16 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_m16 (KERN_ATTR_BASIC ())
 {
 }
 
-KERNEL_FQ void m33600_s04 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_s04 (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -330,10 +330,10 @@ KERNEL_FQ void m33600_s04 (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m33600_s08 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_s08 (KERN_ATTR_BASIC ())
 {
 }
 
-KERNEL_FQ void m33600_s16 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_s16 (KERN_ATTR_BASIC ())
 {
 }
diff --git a/OpenCL/m33600_a1-pure.cl b/OpenCL/m33600_a1-pure.cl
index a97881806..f40299bca 100644
--- a/OpenCL/m33600_a1-pure.cl
+++ b/OpenCL/m33600_a1-pure.cl
@@ -15,7 +15,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33600_mxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_mxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -57,7 +57,7 @@ KERNEL_FQ void m33600_mxx (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m33600_sxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_sxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33600_a3-optimized.cl b/OpenCL/m33600_a3-optimized.cl
index f55c1a36f..efa30b23c 100644
--- a/OpenCL/m33600_a3-optimized.cl
+++ b/OpenCL/m33600_a3-optimized.cl
@@ -152,7 +152,7 @@ DECLSPEC void m33600s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w
   }
 }
 
-KERNEL_FQ void m33600_m04 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_m04 (KERN_ATTR_BASIC ())
 {
   /**
    * base
@@ -201,7 +201,7 @@ KERNEL_FQ void m33600_m04 (KERN_ATTR_BASIC ())
   m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
 }
 
-KERNEL_FQ void m33600_m08 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_m08 (KERN_ATTR_BASIC ())
 {
   /**
    * base
@@ -250,7 +250,7 @@ KERNEL_FQ void m33600_m08 (KERN_ATTR_BASIC ())
   m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
 }
 
-KERNEL_FQ void m33600_m16 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_m16 (KERN_ATTR_BASIC ())
 {
   /**
    * base
@@ -299,7 +299,7 @@ KERNEL_FQ void m33600_m16 (KERN_ATTR_BASIC ())
   m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
 }
 
-KERNEL_FQ void m33600_s04 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_s04 (KERN_ATTR_BASIC ())
 {
   /**
    * base
@@ -348,7 +348,7 @@ KERNEL_FQ void m33600_s04 (KERN_ATTR_BASIC ())
   m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
 }
 
-KERNEL_FQ void m33600_s08 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_s08 (KERN_ATTR_BASIC ())
 {
   /**
    * base
@@ -397,7 +397,7 @@ KERNEL_FQ void m33600_s08 (KERN_ATTR_BASIC ())
   m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
 }
 
-KERNEL_FQ void m33600_s16 (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33600_s16 (KERN_ATTR_BASIC ())
 {
   /**
    * base
diff --git a/OpenCL/m33600_a3-pure.cl b/OpenCL/m33600_a3-pure.cl
index ae113ddcd..80a957311 100644
--- a/OpenCL/m33600_a3-pure.cl
+++ b/OpenCL/m33600_a3-pure.cl
@@ -15,7 +15,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33600_mxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33600_mxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
@@ -70,7 +70,7 @@ KERNEL_FQ void m33600_mxx (KERN_ATTR_VECTOR ())
   }
 }
 
-KERNEL_FQ void m33600_sxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33600_sxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33650_a0-pure.cl b/OpenCL/m33650_a0-pure.cl
index 071cffbc0..b46dc6944 100644
--- a/OpenCL/m33650_a0-pure.cl
+++ b/OpenCL/m33650_a0-pure.cl
@@ -16,7 +16,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33650_mxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33650_mxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -69,7 +69,7 @@ KERNEL_FQ void m33650_mxx (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m33650_sxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33650_sxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33650_a1-pure.cl b/OpenCL/m33650_a1-pure.cl
index 3c248fc8d..6be1df62e 100644
--- a/OpenCL/m33650_a1-pure.cl
+++ b/OpenCL/m33650_a1-pure.cl
@@ -14,7 +14,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33650_mxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33650_mxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -92,7 +92,7 @@ KERNEL_FQ void m33650_mxx (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m33650_sxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33650_sxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33650_a3-pure.cl b/OpenCL/m33650_a3-pure.cl
index 4511a6438..84e3c88a4 100644
--- a/OpenCL/m33650_a3-pure.cl
+++ b/OpenCL/m33650_a3-pure.cl
@@ -14,7 +14,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33650_mxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33650_mxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
@@ -78,7 +78,7 @@ KERNEL_FQ void m33650_mxx (KERN_ATTR_VECTOR ())
   }
 }
 
-KERNEL_FQ void m33650_sxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33650_sxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33660_a0-pure.cl b/OpenCL/m33660_a0-pure.cl
index fd7b04dea..694a44acc 100644
--- a/OpenCL/m33660_a0-pure.cl
+++ b/OpenCL/m33660_a0-pure.cl
@@ -16,7 +16,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33660_mxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33660_mxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -71,7 +71,7 @@ KERNEL_FQ void m33660_mxx (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m33660_sxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33660_sxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33660_a1-pure.cl b/OpenCL/m33660_a1-pure.cl
index 9d070fa83..817accb0b 100644
--- a/OpenCL/m33660_a1-pure.cl
+++ b/OpenCL/m33660_a1-pure.cl
@@ -14,7 +14,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33660_mxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33660_mxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -94,7 +94,7 @@ KERNEL_FQ void m33660_mxx (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m33660_sxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33660_sxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33660_a3-pure.cl b/OpenCL/m33660_a3-pure.cl
index 10e42f73d..59e048095 100644
--- a/OpenCL/m33660_a3-pure.cl
+++ b/OpenCL/m33660_a3-pure.cl
@@ -14,7 +14,7 @@
 #include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
 #endif
 
-KERNEL_FQ void m33660_mxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33660_mxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
@@ -80,7 +80,7 @@ KERNEL_FQ void m33660_mxx (KERN_ATTR_VECTOR ())
   }
 }
 
-KERNEL_FQ void m33660_sxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33660_sxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier

From 03946a05fe0e620ea1c55e39389daac5e4ddaf9b Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 26 Jun 2025 22:05:14 +0200
Subject: [PATCH 33/83] change KERNEL_FQ to KERNEL_FQ KERNEL_FA statements

---
 OpenCL/m33000_a0-pure.cl | 4 ++--
 OpenCL/m33000_a1-pure.cl | 4 ++--
 OpenCL/m33000_a3-pure.cl | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/OpenCL/m33000_a0-pure.cl b/OpenCL/m33000_a0-pure.cl
index 1c4cf0ebf..34f05ded4 100644
--- a/OpenCL/m33000_a0-pure.cl
+++ b/OpenCL/m33000_a0-pure.cl
@@ -26,7 +26,7 @@ typedef struct md5_double_salt
 
 } md5_double_salt_t;
 
-KERNEL_FQ void m33000_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
+KERNEL_FQ KERNEL_FA void m33000_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
 {
   /**
    * modifier
@@ -85,7 +85,7 @@ KERNEL_FQ void m33000_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
   }
 }
 
-KERNEL_FQ void m33000_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
+KERNEL_FQ KERNEL_FA void m33000_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
 {
   /**
    * modifier
diff --git a/OpenCL/m33000_a1-pure.cl b/OpenCL/m33000_a1-pure.cl
index 59a07b57c..a75fafb6e 100644
--- a/OpenCL/m33000_a1-pure.cl
+++ b/OpenCL/m33000_a1-pure.cl
@@ -24,7 +24,7 @@ typedef struct md5_double_salt
 
 } md5_double_salt_t;
 
-KERNEL_FQ void m33000_mxx (KERN_ATTR_ESALT (md5_double_salt_t))
+KERNEL_FQ KERNEL_FA void m33000_mxx (KERN_ATTR_ESALT (md5_double_salt_t))
 {
   /**
    * modifier
@@ -79,7 +79,7 @@ KERNEL_FQ void m33000_mxx (KERN_ATTR_ESALT (md5_double_salt_t))
   }
 }
 
-KERNEL_FQ void m33000_sxx (KERN_ATTR_ESALT (md5_double_salt_t))
+KERNEL_FQ KERNEL_FA void m33000_sxx (KERN_ATTR_ESALT (md5_double_salt_t))
 {
   /**
    * modifier
diff --git a/OpenCL/m33000_a3-pure.cl b/OpenCL/m33000_a3-pure.cl
index eee052eff..6d9a55ab3 100644
--- a/OpenCL/m33000_a3-pure.cl
+++ b/OpenCL/m33000_a3-pure.cl
@@ -24,7 +24,7 @@ typedef struct md5_double_salt
 
 } md5_double_salt_t;
 
-KERNEL_FQ void m33000_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
+KERNEL_FQ KERNEL_FA void m33000_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
 {
   /**
    * modifier
@@ -96,7 +96,7 @@ KERNEL_FQ void m33000_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
   }
 }
 
-KERNEL_FQ void m33000_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
+KERNEL_FQ KERNEL_FA void m33000_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
 {
   /**
    * modifier

From fcc284488005fd27450a5a58a3551ab8e7362f6f Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 26 Jun 2025 22:12:50 +0200
Subject: [PATCH 34/83] do not allow --benchmark and --backend-info

---
 src/user_options.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/user_options.c b/src/user_options.c
index f70a2640f..a7d3bc508 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -1387,6 +1387,13 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx)
       return -1;
     }
 
+    if (user_options->backend_info > 0)
+    {
+      event_log_error (hashcat_ctx, "Use of --backend-info is not allowed in benchmark mode.");
+
+      return -1;
+    }
+
     if (user_options->spin_damp_chgd == true)
     {
       event_log_error (hashcat_ctx, "Can't change --spin-damp in benchmark mode.");

From 0e4b6894eec2b53cddd15c4aa8b083772d79b093 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 26 Jun 2025 22:26:31 +0200
Subject: [PATCH 35/83] module_unstable_warning only for Intel Iris Graphics on
 Apple Intel

---
 src/modules/module_10700.c |  6 +++---
 src/modules/module_22500.c | 10 ++++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/modules/module_10700.c b/src/modules/module_10700.c
index 4a7725e52..e5df902d2 100644
--- a/src/modules/module_10700.c
+++ b/src/modules/module_10700.c
@@ -109,11 +109,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
 {
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    if (device_param->is_metal == false)
+    if (device_param->is_metal == true)
     {
-      if (strncmp (device_param->device_name, "Apple M", 7) == 0)
+      if (strncmp (device_param->device_name, "Intel", 5) == 0)
       {
-        // AppleM1, OpenCL, MTLCompilerService, createKernel never-end with pure kernel and newComputePipelineState failed with optimized kernel
+        // Intel Iris Graphics, Metal Version 244.303: failed to create 'm10700_loop' pipeline, timeout reached (status 49)
         return true;
       }
     }
diff --git a/src/modules/module_22500.c b/src/modules/module_22500.c
index 3bda523ad..57a70ca64 100644
--- a/src/modules/module_22500.c
+++ b/src/modules/module_22500.c
@@ -45,10 +45,16 @@ static const char *SIGNATURE_MULTIBIT = "$multibit$";
 
 bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
-  // AppleM1, OpenCL, MTLCompilerService never-end
   if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
   {
-    return true;
+    if (device_param->is_metal == true)
+    {
+      if (strncmp (device_param->device_name, "Intel", 5) == 0)
+      {
+        // Intel Iris Graphics, Metal Version 244.303: failed to create 'm22500_s04' pipeline, Compilation failed
+        return true;
+      }
+    }
   }
 
   return false;

From 974934dcdf168b389bf1b23cfafe2ea5949bbaf3 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 27 Jun 2025 21:52:57 +0200
Subject: [PATCH 36/83] Trying out a tweak to autotune behavior related to -u
 loop tuning.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since loop values increase by doubling in autotune, a slow hash-mode
with, for example, 1000 iterations can end up with a suboptimal -u count.
Currently, autotuning starts at 1 and doubles (2, 4, 8, ..., 512, 1024).
If the maximum is 1000, autotune stops at 512, resulting in two kernel
calls: one with 512 iterations and another with 488.

The tweak attempts to find the smallest factor that, when repeatedly
doubled, reaches the target exactly.  For 1000, this would be 125
and for 1024, it would be 1.

However, this logic doesn’t align well with how hashcat handles slow
hash iterations. For instance, PBKDF2-based plugins typically set the
iteration count to N-1, since the first iteration is handled by the
`_init` kernel. So, a plugin might set 1023 instead of 1024, and in such
cases, the logic would incorrectly assume 1023 is the minimum factor
which leads to suboptimal tuning.

To work around this, the factor-finder is executed twice: once with
the original iteration count and once with `iteration count + 1`.
The configuration that results in a lower starting point is used.

Other stuff:

- Fixed a critical bug in the autotuner

This bug was introduced a few days ago. The autotuner has the ability
to overtune the maximum allowed thread count under certain conditions.
For example, in unoptimized -a 0 cracking mode when using rules.
Several parts of the hashcat core require strict adherence to this limit,
especially when shared memory is involved.
To resolve this while retaining overtuning for compatible modes,
a new attribute `device_param->overtune_unfriendly` was introduced.
When set to true, it prevents the autotuner from modifying
`kernel_threads_max` and `kernel_accel_max`.
Four sections in `backend.c` have been updated to set this flag,
though additional areas may also require it.

- Moved the code that aligns `kernel_accel` to a multiple of the compute
  unit count into the overtune section.

- Fixed a bug in the HIP dynloader. It now reports actual error strings,
  provided the API returns them.
---
 include/shared.h |  1 +
 include/types.h  |  2 ++
 src/autotune.c   | 40 ++++++++++++++++++++++++++++++----------
 src/backend.c    |  8 ++++++++
 src/ext_hip.c    |  4 ++--
 src/shared.c     |  5 +++++
 6 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/include/shared.h b/include/shared.h
index ae968281c..414f14e61 100644
--- a/include/shared.h
+++ b/include/shared.h
@@ -33,6 +33,7 @@ bool overflow_check_u64_add (const u64 a, const u64 b);
 bool overflow_check_u64_mul (const u64 a, const u64 b);
 
 bool is_power_of_2 (const u32 v);
+u32 smallest_repeat_double (const u32 v);
 
 u32 get_random_num (const u32 min, const u32 max);
 
diff --git a/include/types.h b/include/types.h
index cd3331234..4badfefa3 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1375,6 +1375,8 @@ typedef struct hc_device_param
   u32     kernel_threads_min;
   u32     kernel_threads_max;
 
+  bool    overtune_unfriendly;  // whatever sets this decide we operate in a mode that is not allowing to overtune threads_max or accel_max in autotuner
+
   u64     kernel_power;
   u64     hardware_power;
 
diff --git a/src/autotune.c b/src/autotune.c
index b33ed36d6..df6d5529f 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -98,6 +98,7 @@ static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
 
 static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
+  const hashes_t       *hashes       = hashcat_ctx->hashes;
   const hashconfig_t   *hashconfig   = hashcat_ctx->hashconfig;
   const backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
   const straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx;
@@ -329,7 +330,25 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     // v7 autotuner is a lot more straight forward
 
-    for (u32 kernel_loops_test = kernel_loops_min; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
+    u32 kernel_loops_min_start = kernel_loops_min;
+
+    if (hashes && hashes->st_salts_buf)
+    {
+      u32 start = kernel_loops_max;
+
+      start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter));
+      start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1));
+
+      if ((hashes->st_salts_buf->salt_iter     % 125) == 0) start = MIN (start, 125);
+      if ((hashes->st_salts_buf->salt_iter + 1 % 125) == 0) start = MIN (start, 125);
+
+      if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
+      {
+        kernel_loops_min_start = start;
+      }
+    }
+
+    for (u32 kernel_loops_test = kernel_loops_min_start; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
     {
       double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_test, kernel_threads_min, 2);
 
@@ -401,20 +420,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       if (kernel_accel > kernel_accel_max) kernel_accel = kernel_accel_max;
     }
 
-    if (kernel_accel > 64) kernel_accel -= kernel_accel % 32;
+    // overtune section. relevant if we have strange numbers from the APIs, namely 96, 384, and such
+    // this is a dangerous action, and we set conditions somewhere in the code to disable this
 
-    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
-    {
-      if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors;
-    }
-
-    // some final play, if we have strange numbers from the APIs, namely 96, 384, and such
-
-    if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max))
+    if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max) || (device_param->overtune_unfriendly == true))
     {
     }
     else
     {
+      if (kernel_accel > 64) kernel_accel -= kernel_accel % 32;
+
+      if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+      {
+        if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors;
+      }
+
       u32 fun[2];
 
       if (is_power_of_2 (kernel_threads) == false)
diff --git a/src/backend.c b/src/backend.c
index 6bb511c5b..15e4badbb 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -10532,6 +10532,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       device_param->kernel_loops_max = MIN (device_param->kernel_loops_max, 1024);  // autotune go over ...
       device_param->kernel_loops_min = MIN (device_param->kernel_loops_min, device_param->kernel_loops_max);
+
+      device_param->overtune_unfriendly = true;
     }
     #endif
 
@@ -11499,6 +11501,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             {
               device_param->kernel_threads_min = fixed_local_size;
               // device_param->kernel_threads_max = fixed_local_size;
+
+              device_param->overtune_unfriendly = true;
             }
           }
         }
@@ -16014,6 +16018,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
           device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
+
+          device_param->overtune_unfriendly = true;
         }
       }
     }
@@ -16032,6 +16038,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
       device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
       device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
+
+      device_param->overtune_unfriendly = true;
     }
 
     //    device_param->kernel_threads = kernel_threads;
diff --git a/src/ext_hip.c b/src/ext_hip.c
index c0145504b..decfa987c 100644
--- a/src/ext_hip.c
+++ b/src/ext_hip.c
@@ -133,8 +133,8 @@ int hip_init (void *hashcat_ctx)
   HC_LOAD_FUNC_HIP (hip, hipEventRecord,            hipEventRecord,             HIP_HIPEVENTRECORD,             HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipEventSynchronize,       hipEventSynchronize,        HIP_HIPEVENTSYNCHRONIZE,        HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute,       hipFuncGetAttribute,        HIP_HIPFUNCGETATTRIBUTE,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipGetErrorName,            HIP_HIPGETERRORNAME,            HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipGetErrorString,         hipGetErrorString,          HIP_HIPGETERRORSTRING,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipDrvGetErrorName,         HIP_HIPGETERRORNAME,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorString,         hipDrvGetErrorString,       HIP_HIPGETERRORSTRING,          HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipInit,                   hipInit,                    HIP_HIPINIT,                    HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipLaunchKernel,           hipModuleLaunchKernel,      HIP_HIPLAUNCHKERNEL,            HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemAlloc,               hipMalloc,                  HIP_HIPMEMALLOC,                HIP, 1);
diff --git a/src/shared.c b/src/shared.c
index 25bbf5005..a00025352 100644
--- a/src/shared.c
+++ b/src/shared.c
@@ -206,6 +206,11 @@ bool is_power_of_2 (const u32 v)
   return (v && !(v & (v - 1)));
 }
 
+u32 smallest_repeat_double (const u32 v)
+{
+  return (v / (v & -v));
+}
+
 u32 mydivc32 (const u32 dividend, const u32 divisor)
 {
   u32 quotient = dividend / divisor;

From f6afc8696cf1ed544243fa6f5b19bdbabc1776cf Mon Sep 17 00:00:00 2001
From: Royce Williams <royce@techsolvency.com>
Date: Fri, 27 Jun 2025 14:47:22 -0800
Subject: [PATCH 37/83] show max length in Kernel.Feature status

---
 src/terminal.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 948b4795d..9cd362a9d 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2291,11 +2291,15 @@ void status_display (hashcat_ctx_t *hashcat_ctx)
 
   if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
   {
-    event_log_info (hashcat_ctx, "Kernel.Feature...: Optimized Kernel");
+    event_log_info (hashcat_ctx,
+      "Kernel.Feature...: Optimized Kernel (max length: %s)",
+      hashconfig->pw_max);
   }
   else
   {
-    event_log_info (hashcat_ctx, "Kernel.Feature...: Pure Kernel");
+    event_log_info (hashcat_ctx,
+      "Kernel.Feature...: Pure Kernel (max length: %s)",
+      hashconfig->pw_max);
   }
 
   switch (hashcat_status->guess_mode)

From 7fff4c929aee4542ead786cc1eb0f1a30e06f3f6 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 28 Jun 2025 07:04:44 +0200
Subject: [PATCH 38/83] Fixed a division by zero bug in fast hashes caused by
 hashes->st_salts_buf->salt_iter not being used.

---
 src/autotune.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/autotune.c b/src/autotune.c
index df6d5529f..87637b29d 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -336,15 +336,20 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     {
       u32 start = kernel_loops_max;
 
-      start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter));
-      start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1));
+      const u32 salt_iter = hashes->st_salts_buf->salt_iter;
 
-      if ((hashes->st_salts_buf->salt_iter     % 125) == 0) start = MIN (start, 125);
-      if ((hashes->st_salts_buf->salt_iter + 1 % 125) == 0) start = MIN (start, 125);
-
-      if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
+      if (salt_iter)
       {
-        kernel_loops_min_start = start;
+        start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter));
+        start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1));
+
+        if ((hashes->st_salts_buf->salt_iter     % 125) == 0) start = MIN (start, 125);
+        if ((hashes->st_salts_buf->salt_iter + 1 % 125) == 0) start = MIN (start, 125);
+
+        if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
+        {
+          kernel_loops_min_start = start;
+        }
       }
     }
 
@@ -621,3 +626,4 @@ HC_API_CALL void *thread_autotune (void *p)
 
   return NULL;
 }
+

From 45b8672270ba9d0abf751e74da567fe42c861afb Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 28 Jun 2025 10:42:47 +0200
Subject: [PATCH 39/83] add --machine-readable format to --backend-info

---
 src/terminal.c | 705 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 593 insertions(+), 112 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 948b4795d..2976bc7c2 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -1075,18 +1075,43 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
   const user_options_t  *user_options  = hashcat_ctx->user_options;
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
 
+  if (user_options->machine_readable == true)
+  {
+    printf ("{ ");
+  }
+
   if (user_options->backend_info > 1)
   {
-    event_log_info (hashcat_ctx, "System Info:");
-    event_log_info (hashcat_ctx, "============");
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "System Info:");
+      event_log_info (hashcat_ctx, "============");
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"SystemInfo\": { ");
+    }
 
     #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
     // TODO
-    event_log_info (hashcat_ctx, "OS.Name......: Windows");
-    event_log_info (hashcat_ctx, "OS.Release...: N/A");
-    event_log_info (hashcat_ctx, "HW.Platform..: N/A");
-    event_log_info (hashcat_ctx, "HW.Model.....: N/A");
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "OS.Name......: Windows");
+      event_log_info (hashcat_ctx, "OS.Release...: N/A");
+      event_log_info (hashcat_ctx, "HW.Platform..: N/A");
+      event_log_info (hashcat_ctx, "HW.Model.....: N/A");
+    }
+    else
+    {
+      printf ("\"OS\": { ");
+      printf ("\"Name\": \"%s\", ", "Windows");
+      printf ("\"Release\": \"%s\" }, ", "N/A");
+      printf ("\"Hardware\": { ");
+      printf ("\"Platform\": \"%s\", ", "N/A");
+      printf ("\"Model\": \"%s\" } ", "N/A");
+      printf ("}, ");
+    }
     #else
 
     struct utsname utsbuf;
@@ -1123,10 +1148,23 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
       rc_uname = true;
     }
 
-    event_log_info (hashcat_ctx, "OS.Name......: %s", (rc_uname  == true) ? utsbuf.sysname : "N/A");
-    event_log_info (hashcat_ctx, "OS.Release...: %s", (rc_uname  == true) ? utsbuf.release : "N/A");
-    event_log_info (hashcat_ctx, "HW.Model.....: %s", (rc_sysctl == true) ? hw_model_buf   : "N/A");
-    event_log_info (hashcat_ctx, "HW.Platform..: %s", (rc_uname  == true) ? utsbuf.machine : "N/A");
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "OS.Name......: %s", (rc_uname  == true) ? utsbuf.sysname : "N/A");
+      event_log_info (hashcat_ctx, "OS.Release...: %s", (rc_uname  == true) ? utsbuf.release : "N/A");
+      event_log_info (hashcat_ctx, "HW.Platform..: %s", (rc_uname  == true) ? utsbuf.machine : "N/A");
+      event_log_info (hashcat_ctx, "HW.Model.....: %s", (rc_sysctl == true) ? hw_model_buf   : "N/A");
+    }
+    else
+    {
+      printf ("\"OS\": { ");
+      printf ("\"Name\": \"%s\", ", (rc_uname  == true) ? utsbuf.sysname : "N/A");
+      printf ("\"Release\": \"%s\" }, ", (rc_uname  == true) ? utsbuf.release : "N/A");
+      printf ("\"Hardware\": { ");
+      printf ("\"Platform\": \"%s\", ", (rc_uname  == true) ? utsbuf.machine : "N/A");
+      printf ("\"Model\": \"%s\" } ", (rc_sysctl == true) ? hw_model_buf : "N/A");
+      printf ("}, ");
+    }
 
     if (rc_sysctl == true)
     {
@@ -1134,38 +1172,72 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     }
     #endif // _WIN || __CYGWIN__ || __MSYS__
 
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, NULL);
 
-    event_log_info (hashcat_ctx, "Environment Info:");
-    event_log_info (hashcat_ctx, "=================");
-    event_log_info (hashcat_ctx, NULL);
+      event_log_info (hashcat_ctx, "Environment Info:");
+      event_log_info (hashcat_ctx, "=================");
+      event_log_info (hashcat_ctx, NULL);
 
-    event_log_info (hashcat_ctx, "Cur.Work.Dir.: %s", folder_config->cwd);
-    event_log_info (hashcat_ctx, "Install.Dir..: %s", folder_config->install_dir);
-    event_log_info (hashcat_ctx, "Profile.Dir..: %s", folder_config->profile_dir);
-    event_log_info (hashcat_ctx, "Cache.Dir....: %s", folder_config->cache_dir);
-    // uninitialized at this point, for instance if the user uses --session
-    //event_log_info (hashcat_ctx, "Session.Dir..: %s", folder_config->session_dir);
-    event_log_info (hashcat_ctx, "Shared.Dir...: %s", folder_config->shared_dir);
-    event_log_info (hashcat_ctx, "CL.Inc.Path..: %s", folder_config->cpath_real);
+      event_log_info (hashcat_ctx, "Cur.Work.Dir.: %s", folder_config->cwd);
+      event_log_info (hashcat_ctx, "Install.Dir..: %s", folder_config->install_dir);
+      event_log_info (hashcat_ctx, "Profile.Dir..: %s", folder_config->profile_dir);
+      event_log_info (hashcat_ctx, "Cache.Dir....: %s", folder_config->cache_dir);
+      // uninitialized at this point, for instance if the user uses --session
+      //event_log_info (hashcat_ctx, "Session.Dir..: %s", folder_config->session_dir);
+      event_log_info (hashcat_ctx, "Shared.Dir...: %s", folder_config->shared_dir);
+      event_log_info (hashcat_ctx, "CL.Inc.Path..: %s", folder_config->cpath_real);
 
-    event_log_info (hashcat_ctx, NULL);
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"EnvironmentInfo\": { ");
+      printf ("\"CurrentWorkingDirectory\": \"%s\", ", folder_config->cwd);
+      printf ("\"InstallDirectory\": \"%s\", ", folder_config->install_dir);
+      printf ("\"ProfileDirectory\": \"%s\", ", folder_config->profile_dir);
+      printf ("\"CacheDirectory\": \"%s\", ", folder_config->cache_dir);
+      printf ("\"SharedDirectory\": \"%s\", ", folder_config->shared_dir);
+      printf ("\"CLIncludePath\": \"%s\" ", folder_config->cpath_real);
+      printf ("}, ");
+    }
   }
 
   if (backend_ctx->cuda)
   {
-    event_log_info (hashcat_ctx, "CUDA Info:");
-    event_log_info (hashcat_ctx, "==========");
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "CUDA Info:");
+      event_log_info (hashcat_ctx, "==========");
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"CUDAInfo\": { ");
+    }
 
     int cuda_devices_cnt    = backend_ctx->cuda_devices_cnt;
     int cuda_driver_version = backend_ctx->cuda_driver_version;
 
-    event_log_info (hashcat_ctx, "CUDA.Version.: %u.%u", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "CUDA.Version.: %u.%u", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"Version\": \"%u.%u\", ", cuda_driver_version / 1000, (cuda_driver_version % 100) / 10);
+      printf ("\"BackendDevices\": [ ");
+    }
 
     for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++)
     {
+      if (user_options->machine_readable == true)
+      {
+        printf ("{ ");
+      }
+
       const int backend_devices_idx = backend_ctx->backend_device_from_cuda[cuda_devices_idx];
 
       const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
@@ -1184,29 +1256,88 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param->device_id_alias_cnt)
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        }
+        else
+        {
+          printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+          printf ("\"Alias\": \"%02u\", ", device_param->device_id_alias_buf[0] + 1);
+        }
       }
       else
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%02u", device_id + 1);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "Backend Device ID #%02u", device_id + 1);
+        }
+        else
+        {
+          printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+        }
       }
 
-      event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
-      event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
-      event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
-      event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
-      event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
-      event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
-      event_log_info (hashcat_ctx, "  PCI.Addr.BDFe..: %04x:%02x:%02x.%u", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
-      event_log_info (hashcat_ctx, NULL);
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
+        event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
+        event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
+        event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
+        event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
+        event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
+        event_log_info (hashcat_ctx, "  PCI.Addr.BDFe..: %04x:%02x:%02x.%u", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
+        event_log_info (hashcat_ctx, NULL);
+      }
+      else
+      {
+        printf ("\"Name\": \"%s\", ", device_name);
+        printf ("\"Processor(s)\": \"%u\", ", device_processors);
+        printf ("\"Clock\": \"%u\", ", device_maxclock_frequency);
+        printf ("\"MemoryTotal\": \"%" PRIu64 " MB\", ", device_global_mem / 1024 / 1024);
+        printf ("\"MemoryFree\": \"%" PRIu64 " MB\", ", device_available_mem / 1024 / 1024);
+        printf ("\"LocalMemory\": \"%" PRIu64 " MB\", ", device_local_mem_size / 1024);
+        printf ("\"PCI.Addr.BDFe\": \"%04x:%02x:%02x.%u\" ", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
+      }
+
+      if (user_options->machine_readable == true)
+      {
+        if ((cuda_devices_idx + 1) < cuda_devices_cnt)
+        {
+          printf ("}, ");
+        }
+        else
+        {
+          printf ("} ");
+        }
+      }
+    }
+
+    if (user_options->machine_readable == true)
+    {
+      if (backend_ctx->hip || backend_ctx->mtl || backend_ctx->ocl)
+      {
+        printf ("] }, ");
+      }
+      else
+      {
+        printf ("] } ");
+      }
     }
   }
 
   if (backend_ctx->hip)
   {
-    event_log_info (hashcat_ctx, "HIP Info:");
-    event_log_info (hashcat_ctx, "=========");
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "HIP Info:");
+      event_log_info (hashcat_ctx, "=========");
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"HIPInfo\": { ");
+    }
 
     int hip_devices_cnt    = backend_ctx->hip_devices_cnt;
     int hip_runtimeVersion = backend_ctx->hip_runtimeVersion;
@@ -1217,17 +1348,41 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
       int hip_version_minor = (hip_runtimeVersion - (hip_version_major * 10000000)) / 100000;
       int hip_version_patch = (hip_runtimeVersion - (hip_version_major * 10000000) - (hip_version_minor * 100000));
 
-      event_log_info (hashcat_ctx, "HIP.Version.: %u.%u.%u", hip_version_major, hip_version_minor, hip_version_patch);
-      event_log_info (hashcat_ctx, NULL);
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "HIP.Version.: %u.%u.%u", hip_version_major, hip_version_minor, hip_version_patch);
+        event_log_info (hashcat_ctx, NULL);
+      }
+      else
+      {
+        printf ("\"Version\": \"%u.%u.%u\", ", hip_version_major, hip_version_minor, hip_version_patch);
+      }
     }
     else
     {
-      event_log_info (hashcat_ctx, "HIP.Version.: %u.%u", hip_runtimeVersion / 100, hip_runtimeVersion % 10);
-      event_log_info (hashcat_ctx, NULL);
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "HIP.Version.: %u.%u", hip_runtimeVersion / 100, hip_runtimeVersion % 10);
+        event_log_info (hashcat_ctx, NULL);
+      }
+      else
+      {
+        printf ("\"Version\": \"%u.%u\", ", hip_runtimeVersion / 100, hip_runtimeVersion % 10);
+      }
+    }
+
+    if (user_options->machine_readable == true)
+    {
+      printf ("\"BackendDevices\": [ ");
     }
 
     for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++)
     {
+      if (user_options->machine_readable == true)
+      {
+        printf ("{ ");
+      }
+
       const int backend_devices_idx = backend_ctx->backend_device_from_hip[hip_devices_idx];
 
       const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
@@ -1246,40 +1401,116 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param->device_id_alias_cnt)
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        }
+        else
+        {
+          printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+          printf ("\"Alias\": \"%02u\", ", device_param->device_id_alias_buf[0] + 1);
+        }
       }
       else
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%02u", device_id + 1);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "Backend Device ID #%02u", device_id + 1);
+        }
+        else
+        {
+          printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+        }
       }
 
-      event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
-      event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
-      event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
-      event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
-      event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
-      event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
-      event_log_info (hashcat_ctx, "  PCI.Addr.BDFe..: %04x:%02x:%02x.%u", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
-      event_log_info (hashcat_ctx, NULL);
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
+        event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
+        event_log_info (hashcat_ctx, "  Clock..........: %u", device_maxclock_frequency);
+        event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB", device_global_mem / 1024 / 1024);
+        event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
+        event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
+        event_log_info (hashcat_ctx, "  PCI.Addr.BDFe..: %04x:%02x:%02x.%u", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
+        event_log_info (hashcat_ctx, NULL);
+      }
+      else
+      {
+        printf ("\"Name\": \"%s\", ", device_name);
+        printf ("\"Processor(s)\": \"%u\", ", device_processors);
+        printf ("\"Clock\": \"%u\", ", device_maxclock_frequency);
+        printf ("\"MemoryTotal\": \"%" PRIu64 " MB\", ", device_global_mem / 1024 / 1024);
+        printf ("\"MemoryFree\": \"%" PRIu64 " MB\", ", device_available_mem / 1024 / 1024);
+        printf ("\"LocalMemory\": \"%" PRIu64 " MB\", ", device_local_mem_size / 1024);
+        printf ("\"PCI.Addr.BDFe\": \"%04x:%02x:%02x.%u\" ", (u16) pcie_domain, pcie_bus, pcie_device, pcie_function);
+      }
+
+      if (user_options->machine_readable == true)
+      {
+        if ((hip_devices_idx + 1) < hip_devices_cnt)
+        {
+          printf ("}, ");
+        }
+        else
+        {
+          printf ("} ");
+        }
+      }
+    }
+
+    if (user_options->machine_readable == true)
+    {
+      if (backend_ctx->mtl || backend_ctx->ocl)
+      {
+        printf ("] }, ");
+      }
+      else
+      {
+        printf ("] } ");
+      }
     }
   }
 
   #if defined (__APPLE__)
   if (backend_ctx->mtl)
   {
-    event_log_info (hashcat_ctx, "Metal Info:");
-    event_log_info (hashcat_ctx, "===========");
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "Metal Info:");
+      event_log_info (hashcat_ctx, "===========");
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"MetalInfo\": { ");
+    }
 
     int metal_devices_cnt = backend_ctx->metal_devices_cnt;
 
     char *metal_runtimeVersionStr = backend_ctx->metal_runtimeVersionStr;
 
-    event_log_info (hashcat_ctx, "Metal.Version.: %s", metal_runtimeVersionStr);
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "Metal.Version.: %s", metal_runtimeVersionStr);
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"Version\": \"%s\", ", metal_runtimeVersionStr);
+    }
+
+    if (user_options->machine_readable == true)
+    {
+      printf ("\"BackendDevices\": [ ");
+    }
 
     for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++)
     {
+      if (user_options->machine_readable == true)
+      {
+        printf ("{ ");
+      }
+
       const int backend_devices_idx = backend_ctx->backend_device_from_metal[metal_devices_idx];
 
       const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
@@ -1310,30 +1541,111 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
       if (device_param->device_id_alias_cnt)
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+        }
+        else
+        {
+          printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+          printf ("\"Alias\": \"%02u\", ", device_param->device_id_alias_buf[0] + 1);
+        }
       }
       else
       {
-        event_log_info (hashcat_ctx, "Backend Device ID #%02u", device_id + 1);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "Backend Device ID #%02u", device_id + 1);
+        }
+        else
+        {
+          printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+        }
       }
 
-      event_log_info (hashcat_ctx, "  Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
-      event_log_info (hashcat_ctx, "  Vendor.ID......: %u", opencl_device_vendor_id);
-      event_log_info (hashcat_ctx, "  Vendor.........: %s", opencl_device_vendor);
-      event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
-      event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
-      event_log_info (hashcat_ctx, "  Clock..........: N/A");
-      event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB (limited to %" PRIu64 " MB allocatable in one block)", device_global_mem / 1024 / 1024, device_maxmem_alloc / 1024 / 1024);
-      event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
-      event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "  Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+        event_log_info (hashcat_ctx, "  Vendor.ID......: %u", opencl_device_vendor_id);
+        event_log_info (hashcat_ctx, "  Vendor.........: %s", opencl_device_vendor);
+        event_log_info (hashcat_ctx, "  Name...........: %s", device_name);
+        event_log_info (hashcat_ctx, "  Processor(s)...: %u", device_processors);
+        event_log_info (hashcat_ctx, "  Clock..........: N/A");
+        event_log_info (hashcat_ctx, "  Memory.Total...: %" PRIu64 " MB (limited to %" PRIu64 " MB allocatable in one block)", device_global_mem / 1024 / 1024, device_maxmem_alloc / 1024 / 1024);
+        event_log_info (hashcat_ctx, "  Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
+        event_log_info (hashcat_ctx, "  Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
+      }
+      else
+      {
+        printf ("\"Type\": \"%s\", ", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+        printf ("\"VendorID\": \"%u\", ", opencl_device_vendor_id);
+        printf ("\"Vendor\": \"%s\", ", opencl_device_vendor);
+        printf ("\"Name\": \"%s\", ", device_name);
+        printf ("\"Processor(s)\": \"%u\", ", device_processors);
+        printf ("\"Clock\": \"%s\", ", "N/A");
+        printf ("\"MemoryTotal\": \"%" PRIu64 " MB\", ", device_global_mem / 1024 / 1024);
+        printf ("\"MemoryAllocPerBlock\": \"%" PRIu64 " MB\", ", device_maxmem_alloc / 1024 / 1024);
+        printf ("\"MemoryFree\": \"%" PRIu64 " MB\", ", device_available_mem / 1024 / 1024);
+        printf ("\"LocalMemory\": \"%" PRIu64 " MB\", ", device_local_mem_size / 1024);
+      }
 
       switch (device_physical_location)
       {
-        case MTL_DEVICE_LOCATION_BUILTIN:     event_log_info (hashcat_ctx, "  Phys.Location..: built-in"); break;
-        case MTL_DEVICE_LOCATION_SLOT:        event_log_info (hashcat_ctx, "  Phys.Location..: connected to slot %u", device_location_number); break;
-        case MTL_DEVICE_LOCATION_EXTERNAL:    event_log_info (hashcat_ctx, "  Phys.Location..: connected via an external interface (port %u)", device_location_number); break;
-        case MTL_DEVICE_LOCATION_UNSPECIFIED: event_log_info (hashcat_ctx, "  Phys.Location..: unspecified"); break;
-        default:                              event_log_info (hashcat_ctx, "  Phys.Location..: N/A"); break;
+        case MTL_DEVICE_LOCATION_BUILTIN:
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Phys.Location..: built-in");
+          }
+          else
+          {
+            printf ("\"PhysicalLocation\": \"built-in\", ");
+          }
+
+          break;
+        case MTL_DEVICE_LOCATION_SLOT:
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Phys.Location..: connected to slot %u", device_location_number);
+          }
+          else
+          {
+            printf ("\"PhysicalLocation\": \"connected to slot %u\", ", device_location_number);
+          }
+
+          break;
+        case MTL_DEVICE_LOCATION_EXTERNAL:
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Phys.Location..: connected via an external interface (port %u)", device_location_number);
+          }
+          else
+          {
+            printf ("\"PhysicalLocation\": \"connected via an external interface (port %u)\", ", device_location_number);
+          }
+
+          break;
+        case MTL_DEVICE_LOCATION_UNSPECIFIED:
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Phys.Location..: unspecified");
+          }
+          else
+          {
+            printf ("\"PhysicalLocation\": \"unspecified\", ");
+          }
+
+          break;
+        default:
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Phys.Location..: N/A");
+          }
+          else
+          {
+            printf ("\"PhysicalLocation\": \"%s\", ", "N/A");
+          }
+
+          break;
       }
 
       /*
@@ -1347,28 +1659,92 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
       }
       */
 
-      event_log_info (hashcat_ctx, "  Registry.ID....: %u", device_registryID);
-
-      if (device_physical_location != MTL_DEVICE_LOCATION_BUILTIN)
+      if (user_options->machine_readable == false)
       {
-        event_log_info (hashcat_ctx, "  Max.TX.Rate....: %u MB/sec", device_max_transfer_rate);
+        event_log_info (hashcat_ctx, "  Registry.ID....: %u", device_registryID);
       }
       else
       {
-        event_log_info (hashcat_ctx, "  Max.TX.Rate....: N/A");
+        printf ("\"RegistryID\": \"%u\", ", device_registryID);
       }
 
-      event_log_info (hashcat_ctx, "  GPU.Properties.: headless %u, low-power %u, removable %u", device_is_headless, device_is_low_power, device_is_removable);
-      event_log_info (hashcat_ctx, NULL);
+      if (device_physical_location != MTL_DEVICE_LOCATION_BUILTIN)
+      {
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "  Max.TX.Rate....: %u MB/sec", device_max_transfer_rate);
+        }
+        else
+        {
+          printf ("\"MaxTXRate\": \"%u MB/sec\", ", device_max_transfer_rate);
+        }
+      }
+      else
+      {
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "  Max.TX.Rate....: N/A");
+        }
+        else
+        {
+          printf ("\"MaxTXRate\": \"%s\", ", "N/A");
+        }
+      }
+
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "  GPU.Properties.: headless %u, low-power %u, removable %u", device_is_headless, device_is_low_power, device_is_removable);
+        event_log_info (hashcat_ctx, NULL);
+      }
+      else
+      {
+        printf ("\"GPUProperties\": { ");
+        printf ("\"headless\": \"%u\", ", device_is_headless);
+        printf ("\"low_power\": \"%u\", ", device_is_low_power);
+        printf ("\"removable\": \"%u\" ", device_is_removable);
+        printf ("} ");
+      }
+
+      if (user_options->machine_readable == true)
+      {
+        if ((metal_devices_idx + 1) < metal_devices_cnt)
+        {
+          printf ("}, ");
+        }
+        else
+        {
+          printf ("} ");
+        }
+      }
+    }
+
+    if (user_options->machine_readable == true)
+    {
+      if (backend_ctx->ocl)
+      {
+        printf ("] }, ");
+      }
+      else
+      {
+        printf ("] } ");
+      }
     }
   }
   #endif
 
   if (backend_ctx->ocl)
   {
-    event_log_info (hashcat_ctx, "OpenCL Info:");
-    event_log_info (hashcat_ctx, "============");
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "OpenCL Info:");
+      event_log_info (hashcat_ctx, "============");
+      event_log_info (hashcat_ctx, NULL);
+    }
+    else
+    {
+      printf ("\"OpenCLInfo\": { ");
+      printf ("\"Platforms\": [ ");
+    }
 
     cl_uint   opencl_platforms_cnt         = backend_ctx->opencl_platforms_cnt;
     cl_uint  *opencl_platforms_devices_cnt = backend_ctx->opencl_platforms_devices_cnt;
@@ -1378,19 +1754,44 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
     for (cl_uint opencl_platforms_idx = 0; opencl_platforms_idx < opencl_platforms_cnt; opencl_platforms_idx++)
     {
+      if (user_options->machine_readable == true)
+      {
+        printf ("{ ");
+      }
+
       char     *opencl_platform_vendor       = opencl_platforms_vendor[opencl_platforms_idx];
       char     *opencl_platform_name         = opencl_platforms_name[opencl_platforms_idx];
       char     *opencl_platform_version      = opencl_platforms_version[opencl_platforms_idx];
       cl_uint   opencl_platform_devices_cnt  = opencl_platforms_devices_cnt[opencl_platforms_idx];
 
-      event_log_info (hashcat_ctx, "OpenCL Platform ID #%u", opencl_platforms_idx + 1);
-      event_log_info (hashcat_ctx, "  Vendor..: %s",  opencl_platform_vendor);
-      event_log_info (hashcat_ctx, "  Name....: %s",  opencl_platform_name);
-      event_log_info (hashcat_ctx, "  Version.: %s",  opencl_platform_version);
-      event_log_info (hashcat_ctx, NULL);
+      if (user_options->machine_readable == false)
+      {
+        event_log_info (hashcat_ctx, "OpenCL Platform ID #%u", opencl_platforms_idx + 1);
+        event_log_info (hashcat_ctx, "  Vendor..: %s",  opencl_platform_vendor);
+        event_log_info (hashcat_ctx, "  Name....: %s",  opencl_platform_name);
+        event_log_info (hashcat_ctx, "  Version.: %s",  opencl_platform_version);
+        event_log_info (hashcat_ctx, NULL);
+      }
+      else
+      {
+        printf ("\"PlatformID\": \"%u\", ", opencl_platforms_idx + 1);
+        printf ("\"Vendor\": \"%s\", ", opencl_platform_vendor);
+        printf ("\"Name\": \"%s\", ", opencl_platform_name);
+        printf ("\"Version\": \"%s\", ", opencl_platform_version);
+      }
+
+      if (user_options->machine_readable == true)
+      {
+        printf ("\"BackendDevices\": [ ");
+      }
 
       for (cl_uint opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++)
       {
+        if (user_options->machine_readable == true)
+        {
+          printf ("{ ");
+        }
+
         const int backend_devices_idx = backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx];
 
         const hc_device_param_t *device_param = backend_ctx->devices_param + backend_devices_idx;
@@ -1412,25 +1813,58 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
         if (device_param->device_id_alias_cnt)
         {
-          event_log_info (hashcat_ctx, "  Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Backend Device ID #%02u (Alias: #%02u)", device_id + 1, device_param->device_id_alias_buf[0] + 1);
+          }
+          else
+          {
+            printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+            printf ("\"Alias\": \"%02u\", ", device_param->device_id_alias_buf[0] + 1);
+          }
         }
         else
         {
-          event_log_info (hashcat_ctx, "  Backend Device ID #%02u", device_id + 1);
+          if (user_options->machine_readable == false)
+          {
+            event_log_info (hashcat_ctx, "  Backend Device ID #%02u", device_id + 1);
+          }
+          else
+          {
+            printf ("\"DeviceID\": \"%02u\", ", device_id + 1);
+          }
         }
 
-        event_log_info (hashcat_ctx, "    Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
-        event_log_info (hashcat_ctx, "    Vendor.ID......: %u", opencl_device_vendor_id);
-        event_log_info (hashcat_ctx, "    Vendor.........: %s", opencl_device_vendor);
-        event_log_info (hashcat_ctx, "    Name...........: %s", device_name);
-        event_log_info (hashcat_ctx, "    Version........: %s", opencl_device_version);
-        event_log_info (hashcat_ctx, "    Processor(s)...: %u", device_processors);
-        event_log_info (hashcat_ctx, "    Clock..........: %u", device_maxclock_frequency);
-        event_log_info (hashcat_ctx, "    Memory.Total...: %" PRIu64 " MB (limited to %" PRIu64 " MB allocatable in one block)", device_global_mem / 1024 / 1024, device_maxmem_alloc / 1024 / 1024);
-        event_log_info (hashcat_ctx, "    Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
-        event_log_info (hashcat_ctx, "    Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
-        event_log_info (hashcat_ctx, "    OpenCL.Version.: %s", opencl_device_c_version);
-        event_log_info (hashcat_ctx, "    Driver.Version.: %s", opencl_driver_version);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, "    Type...........: %s", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+          event_log_info (hashcat_ctx, "    Vendor.ID......: %u", opencl_device_vendor_id);
+          event_log_info (hashcat_ctx, "    Vendor.........: %s", opencl_device_vendor);
+          event_log_info (hashcat_ctx, "    Name...........: %s", device_name);
+          event_log_info (hashcat_ctx, "    Version........: %s", opencl_device_version);
+          event_log_info (hashcat_ctx, "    Processor(s)...: %u", device_processors);
+          event_log_info (hashcat_ctx, "    Clock..........: %u", device_maxclock_frequency);
+          event_log_info (hashcat_ctx, "    Memory.Total...: %" PRIu64 " MB (limited to %" PRIu64 " MB allocatable in one block)", device_global_mem / 1024 / 1024, device_maxmem_alloc / 1024 / 1024);
+          event_log_info (hashcat_ctx, "    Memory.Free....: %" PRIu64 " MB", device_available_mem / 1024 / 1024);
+          event_log_info (hashcat_ctx, "    Local.Memory...: %" PRIu64 " KB", device_local_mem_size / 1024);
+          event_log_info (hashcat_ctx, "    OpenCL.Version.: %s", opencl_device_c_version);
+          event_log_info (hashcat_ctx, "    Driver.Version.: %s", opencl_driver_version);
+        }
+        else
+        {
+          printf ("\"Type\": \"%s\", ", ((opencl_device_type & CL_DEVICE_TYPE_CPU) ? "CPU" : ((opencl_device_type & CL_DEVICE_TYPE_GPU) ? "GPU" : "Accelerator")));
+          printf ("\"VendorID\": \"%u\", ", opencl_device_vendor_id);
+          printf ("\"Vendor\": \"%s\", ", opencl_device_vendor);
+          printf ("\"Name\": \"%s\", ", device_name);
+          printf ("\"Processor(s)\": \"%u\", ", device_processors);
+          printf ("\"Clock\": \"%u\", ", device_maxclock_frequency);
+          printf ("\"MemoryTotal\": \"%" PRIu64 " MB\", ", device_global_mem / 1024 / 1024);
+          printf ("\"MemoryAllocPerBlock\": \"%" PRIu64 " MB\", ", device_maxmem_alloc / 1024 / 1024);
+          printf ("\"MemoryFree\": \"%" PRIu64 " MB\", ", device_available_mem / 1024 / 1024);
+          printf ("\"LocalMemory\": \"%" PRIu64 " MB\", ", device_local_mem_size / 1024);
+          printf ("\"OpenCLVersion\": \"%s\", ", opencl_device_c_version);
+          printf ("\"DriverVersion\": \"%s\" ", opencl_device_version);
+        }
 
         if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
@@ -1440,18 +1874,68 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
-            event_log_info (hashcat_ctx, "    PCI.Addr.BDF...: %02x:%02x.%u", pcie_bus, pcie_device, pcie_function);
+            if (user_options->machine_readable == false)
+            {
+              event_log_info (hashcat_ctx, "    PCI.Addr.BDF...: %02x:%02x.%u", pcie_bus, pcie_device, pcie_function);
+            }
+            else
+            {
+              printf (", \"PCI.Addr.BDF\": \"%02x:%02x.%u\" ", pcie_bus, pcie_device, pcie_function);
+            }
           }
 
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
-            event_log_info (hashcat_ctx, "    PCI.Addr.BDF...: %02x:%02x.%u", pcie_bus, pcie_device, pcie_function);
+            if (user_options->machine_readable == false)
+            {
+              event_log_info (hashcat_ctx, "    PCI.Addr.BDF...: %02x:%02x.%u", pcie_bus, pcie_device, pcie_function);
+            }
+            else
+            {
+              printf (", \"PCI.Addr.BDF\": \"%02x:%02x.%u\" ", pcie_bus, pcie_device, pcie_function);
+            }
           }
         }
 
-        event_log_info (hashcat_ctx, NULL);
+        if (user_options->machine_readable == false)
+        {
+          event_log_info (hashcat_ctx, NULL);
+        }
+        else
+        {
+          if ((opencl_platform_devices_idx + 1) < opencl_platform_devices_cnt)
+          {
+            printf ("}, ");
+          }
+          else
+          {
+            printf ("} ");
+          }
+        }
+      }
+
+      if (user_options->machine_readable == true)
+      {
+        if ((opencl_platforms_idx + 1) < opencl_platforms_cnt)
+        {
+          printf ("] }, ");
+        }
+        else
+        {
+          printf ("] } ");
+        }
       }
     }
+
+    if (user_options->machine_readable == true)
+    {
+      printf ("] } ");
+    }
+  }
+
+  if (user_options->machine_readable == true)
+  {
+    printf ("}");
   }
 }
 
@@ -2676,10 +3160,7 @@ void status_display (hashcat_ctx_t *hashcat_ctx)
         digests_remain,
         digests_remain_percent);
     }
-  }
 
-  if (hashcat_status->digests_cnt > 1000)
-  {
     event_log_info (hashcat_ctx,
       "Recovered/Time...: %s",
       hashcat_status->cpt);

From 92b2f996b29fd72f79b82fae18d774b4f683d379 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 28 Jun 2025 10:45:24 +0200
Subject: [PATCH 40/83] update docs/changes.txt

---
 docs/changes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changes.txt b/docs/changes.txt
index d9ff56f2b..5ff9f0f63 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -125,6 +125,7 @@
 - Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
 - Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
 - Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
+- Backend Info: Added --machine-readable format
 - Brain: Added sanity check and corresponding error message for invalid --brain-port values
 - Dependencies: Added sse2neon v1.8.0 (commit 658eeac)
 - Dependencies: Updated LZMA SDK to 24.09

From a4bcde8aeddd4579e6950a716a8e9b598ce0792e Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 28 Jun 2025 11:16:39 +0200
Subject: [PATCH 41/83] fix json format on hash_info_single_json

---
 src/terminal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/terminal.c b/src/terminal.c
index f15a0f87d..4a9fc45cb 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -726,7 +726,7 @@ void hash_info_single_json (hashcat_ctx_t *hashcat_ctx, user_options_extra_t *us
       }
     }
 
-    printf ("\"password_type\": %s, ", t_pw_desc);
+    printf ("\"password_type\": \"%s\", ", t_pw_desc);
     printf ("\"password_len_min\": %u, ", t_pw_min);
     printf ("\"password_len_max\": %u, ", t_pw_max);
 

From c275c35cedd9817e237652c06af48cdab46a9a8f Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sat, 28 Jun 2025 22:54:36 +0200
Subject: [PATCH 42/83] workaround for HIP bug and avoiding a potential same
 bug on CUDA

---
 src/backend.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/backend.c b/src/backend.c
index 15e4badbb..fc0051dd4 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -15948,6 +15948,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         u32 threads_per_block_with_regs = (floor) ((float) device_param->regsPerBlock / num_regs);
 
+        if (threads_per_block_with_regs == 0)
+        {
+          // prevent threads_per_block from resulting in 0 due to a bug on the runtime
+          threads_per_block_with_regs = threads_per_block;
+        }
+
         if (threads_per_block_with_regs > device_param->kernel_preferred_wgs_multiple) threads_per_block_with_regs -= threads_per_block_with_regs % device_param->kernel_preferred_wgs_multiple;
 
         threads_per_block = MIN (threads_per_block, threads_per_block_with_regs);
@@ -15967,6 +15973,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         u32 threads_per_block_with_regs = (floor) ((float) device_param->regsPerBlock / num_regs);
 
+        if (threads_per_block_with_regs == 0)
+        {
+          // https://rocm.docs.amd.com/projects/HIP/en/docs-develop/doxygen/html/bug.html
+          // HIP-Clang always returns 0 for regsPerBlock due to a known bug
+          // prevent threads_per_block from resulting in 0, otherwise hashcat crashes
+          threads_per_block_with_regs = threads_per_block;
+        }
+
         if (threads_per_block_with_regs > device_param->kernel_preferred_wgs_multiple) threads_per_block_with_regs -= threads_per_block_with_regs % device_param->kernel_preferred_wgs_multiple;
 
         threads_per_block = MIN (threads_per_block, threads_per_block_with_regs);

From 7e2c65cc98398f1fcf4026e4374ec3e0f56dca0f Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 13:28:31 +0200
Subject: [PATCH 43/83] Backend: Splitting backend_ctx_devices_init into
 smaller runtime-specific functions

---
 docs/changes.txt |   1 +
 src/backend.c    | 210 +++++++++++++++++++++++++++++------------------
 2 files changed, 131 insertions(+), 80 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index cc75fe622..1c2f56a75 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -127,6 +127,7 @@
 - Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support
 - Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
 - Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
+- Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions
 - Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
 - Brain: Added sanity check and corresponding error message for invalid --brain-port values
 - Dependencies: Added sse2neon v1.8.0 (commit 658eeac)
diff --git a/src/backend.c b/src/backend.c
index fc0051dd4..23baa8b25 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5418,37 +5418,13 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
 
-int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
+int backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device, bool *need_nvml, MAYBE_UNUSED bool *need_nvapi)
 {
-  const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
-  const folder_config_t *folder_config = hashcat_ctx->folder_config;
-        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
-        user_options_t  *user_options  = hashcat_ctx->user_options;
-
-  if (backend_ctx->enabled == false) return 0;
+  backend_ctx_t     *backend_ctx   = hashcat_ctx->backend_ctx;
+  user_options_t    *user_options  = hashcat_ctx->user_options;
 
   hc_device_param_t *devices_param = backend_ctx->devices_param;
 
-  bool need_adl           = false;
-  bool need_nvml          = false;
-  bool need_nvapi         = false;
-  bool need_sysfs_amdgpu  = false;
-  bool need_sysfs_cpu     = false;
-  bool need_iokit         = false;
-
-  int bridge_link_device = 0; // this will only count active device
-
-  int backend_devices_idx = 0; // this will not only count active devices
-
-  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
-
-  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
-
-  int virthost = -1;
-  int virthost_finder = user_options->backend_devices_virthost;
-
-  // CUDA
-
   int cuda_devices_cnt    = 0;
   int cuda_devices_active = 0;
 
@@ -5463,15 +5439,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (is_virtualized == true)
     {
-      if ((virthost == -1) && (virthost_finder <= cuda_devices_cnt))
+      if ((*virthost == -1) && (*virthost_finder <= cuda_devices_cnt))
       {
         cuda_devices_cnt = virtmulti;
 
-        virthost = virthost_finder - 1;
+        *virthost = *virthost_finder - 1;
       }
       else
       {
-        virthost_finder -= cuda_devices_cnt;
+        *virthost_finder -= cuda_devices_cnt;
 
         cuda_devices_cnt = 0;
       }
@@ -5481,17 +5457,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     // device specific
 
-    for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++, backend_devices_idx++)
+    for (int cuda_devices_idx = 0; cuda_devices_idx < cuda_devices_cnt; cuda_devices_idx++, (*backend_devices_idx)++)
     {
-      const u32 device_id = backend_devices_idx;
+      const u32 device_id = *backend_devices_idx;
 
-      const u32 cuda_devices_idx_real = (is_virtualized == true) ? virthost : cuda_devices_idx;
+      const u32 cuda_devices_idx_real = (is_virtualized == true) ? *virthost : cuda_devices_idx;
 
-      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
+      hc_device_param_t *device_param = &devices_param[*backend_devices_idx];
 
       device_param->device_id = device_id;
 
-      backend_ctx->backend_device_from_cuda[cuda_devices_idx] = backend_devices_idx;
+      backend_ctx->backend_device_from_cuda[cuda_devices_idx] = *backend_devices_idx;
 
       CUdevice cuda_device;
 
@@ -5791,10 +5767,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
       {
-        need_nvml = true;
+        *need_nvml = true;
 
         #if defined (_WIN) || defined (__CYGWIN__)
-        need_nvapi = true;
+        *need_nvapi = true;
         #endif
       }
 
@@ -5906,7 +5882,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (device_param->skipped == false)
       {
-        device_param->bridge_link_device = bridge_link_device++;
+        device_param->bridge_link_device = (*bridge_link_device)++;
 
         cuda_devices_active++;
       }
@@ -5916,7 +5892,18 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->cuda_devices_cnt     = cuda_devices_cnt;
   backend_ctx->cuda_devices_active  = cuda_devices_active;
 
-  // HIP
+  return 0;
+}
+
+int backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device, bool *need_adl, MAYBE_UNUSED bool *need_sysfs_amdgpu)
+{
+  #if defined (__linux__)
+  const folder_config_t *folder_config = hashcat_ctx->folder_config;
+  #endif
+  backend_ctx_t         *backend_ctx   = hashcat_ctx->backend_ctx;
+  user_options_t        *user_options  = hashcat_ctx->user_options;
+
+  hc_device_param_t     *devices_param = backend_ctx->devices_param;
 
   int hip_devices_cnt    = 0;
   int hip_devices_active = 0;
@@ -5932,15 +5919,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (is_virtualized == true)
     {
-      if ((virthost == -1) && (virthost_finder <= hip_devices_cnt))
+      if ((*virthost == -1) && (*virthost_finder <= hip_devices_cnt))
       {
         hip_devices_cnt = virtmulti;
 
-        virthost = virthost_finder - 1;
+        *virthost = *virthost_finder - 1;
       }
       else
       {
-        virthost_finder -= hip_devices_cnt;
+        *virthost_finder -= hip_devices_cnt;
 
         hip_devices_cnt = 0;
       }
@@ -5950,17 +5937,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     // device specific
 
-    for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++, backend_devices_idx++)
+    for (int hip_devices_idx = 0; hip_devices_idx < hip_devices_cnt; hip_devices_idx++, (*backend_devices_idx)++)
     {
-      const u32 device_id = backend_devices_idx;
+      const u32 device_id = *backend_devices_idx;
 
-      const u32 hip_devices_idx_real = (is_virtualized == true) ? virthost : hip_devices_idx;
+      const u32 hip_devices_idx_real = (is_virtualized == true) ? *virthost : hip_devices_idx;
 
-      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
+      hc_device_param_t *device_param = &devices_param[*backend_devices_idx];
 
       device_param->device_id = device_id;
 
-      backend_ctx->backend_device_from_hip[hip_devices_idx] = backend_devices_idx;
+      backend_ctx->backend_device_from_hip[hip_devices_idx] = *backend_devices_idx;
 
       hipDevice_t hip_device;
 
@@ -6275,10 +6262,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD_USE_HIP) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
-         need_adl = true;
+         *need_adl = true;
 
          #if defined (__linux__)
-         need_sysfs_amdgpu = true;
+         *need_sysfs_amdgpu = true;
          #endif
       }
 
@@ -6404,7 +6391,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (device_param->skipped == false)
       {
-        device_param->bridge_link_device = bridge_link_device++;
+        device_param->bridge_link_device = (*bridge_link_device)++;
 
         hip_devices_active++;
       }
@@ -6414,12 +6401,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->hip_devices_cnt     = hip_devices_cnt;
   backend_ctx->hip_devices_active  = hip_devices_active;
 
-  // Metal
+  return 0;
+}
+
+int backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED bool is_virtualized, MAYBE_UNUSED int virtmulti, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device, MAYBE_UNUSED bool *need_iokit)
+{
+  backend_ctx_t     *backend_ctx   = hashcat_ctx->backend_ctx;
 
   int metal_devices_cnt    = 0;
   int metal_devices_active = 0;
 
   #if defined (__APPLE__)
+  hc_device_param_t *devices_param = backend_ctx->devices_param;
+
   if (backend_ctx->mtl)
   {
     // device count
@@ -6431,15 +6425,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     if (is_virtualized == true)
     {
-      if ((virthost == -1) && (virthost_finder <= metal_devices_cnt))
+      if ((*virthost == -1) && (*virthost_finder <= metal_devices_cnt))
       {
         metal_devices_cnt = virtmulti;
 
-        virthost = virthost_finder - 1;
+        *virthost = *virthost_finder - 1;
       }
       else
       {
-        virthost_finder -= metal_devices_cnt;
+        *virthost_finder -= metal_devices_cnt;
 
         metal_devices_cnt = 0;
       }
@@ -6449,17 +6443,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
     // device specific
 
-    for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++, backend_devices_idx++)
+    for (int metal_devices_idx = 0; metal_devices_idx < metal_devices_cnt; metal_devices_idx++, (*backend_devices_idx)++)
     {
-      const u32 device_id = backend_devices_idx;
+      const u32 device_id = *backend_devices_idx;
 
-      const u32 metal_devices_idx_real = (is_virtualized == true) ? virthost : metal_devices_idx;
+      const u32 metal_devices_idx_real = (is_virtualized == true) ? *virthost : metal_devices_idx;
 
-      hc_device_param_t *device_param = &devices_param[backend_devices_idx];
+      hc_device_param_t *device_param = &devices_param[*backend_devices_idx];
 
       device_param->device_id = device_id;
 
-      backend_ctx->backend_device_from_metal[metal_devices_idx] = backend_devices_idx;
+      backend_ctx->backend_device_from_metal[metal_devices_idx] = *backend_devices_idx;
 
       mtl_device_id metal_device = NULL;
 
@@ -6789,7 +6783,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_APPLE))
       {
-        need_iokit = true;
+        *need_iokit = true;
       }
 
       // CPU burning loop damper
@@ -6831,7 +6825,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (device_param->skipped == false)
       {
-        device_param->bridge_link_device = bridge_link_device++;
+        device_param->bridge_link_device = (*bridge_link_device)++;
 
         metal_devices_active++;
       }
@@ -6842,7 +6836,16 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->metal_devices_cnt     = metal_devices_cnt;
   backend_ctx->metal_devices_active  = metal_devices_active;
 
-  // OCL
+  return 0;
+}
+
+int backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device, bool *need_nvml, MAYBE_UNUSED bool *need_nvapi, bool *need_adl, MAYBE_UNUSED bool *need_sysfs_amdgpu, MAYBE_UNUSED bool *need_sysfs_cpu, MAYBE_UNUSED bool *need_iokit)
+{
+  const folder_config_t *folder_config = hashcat_ctx->folder_config;
+  backend_ctx_t         *backend_ctx   = hashcat_ctx->backend_ctx;
+  user_options_t        *user_options  = hashcat_ctx->user_options;
+
+  hc_device_param_t     *devices_param = backend_ctx->devices_param;
 
   int opencl_devices_cnt    = 0;
   int opencl_devices_active = 0;
@@ -6868,15 +6871,15 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       if (is_virtualized == true)
       {
-        if ((virthost == -1) && (virthost_finder <= (int) opencl_platform_devices_cnt))
+        if ((*virthost == -1) && (*virthost_finder <= (int) opencl_platform_devices_cnt))
         {
           opencl_platform_devices_cnt = virtmulti;
 
-          virthost = virthost_finder - 1;
+          *virthost = *virthost_finder - 1;
         }
         else
         {
-          virthost_finder -= (int) opencl_platform_devices_cnt;
+          *virthost_finder -= (int) opencl_platform_devices_cnt;
 
           opencl_platform_devices_cnt = 0;
         }
@@ -6884,21 +6887,21 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         opencl_platforms_devices_cnt[opencl_platforms_idx] = opencl_platform_devices_cnt;
       }
 
-      for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, backend_devices_idx++, opencl_devices_cnt++)
+      for (u32 opencl_platform_devices_idx = 0; opencl_platform_devices_idx < opencl_platform_devices_cnt; opencl_platform_devices_idx++, (*backend_devices_idx)++, opencl_devices_cnt++)
       {
-        const u32 device_id = backend_devices_idx;
+        const u32 device_id = *backend_devices_idx;
 
         hc_device_param_t *device_param = &devices_param[device_id];
 
         device_param->device_id = device_id;
 
-        backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = backend_devices_idx;
+        backend_ctx->backend_device_from_opencl[opencl_devices_cnt] = *backend_devices_idx;
 
-        backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = backend_devices_idx;
+        backend_ctx->backend_device_from_opencl_platform[opencl_platforms_idx][opencl_platform_devices_idx] = *backend_devices_idx;
 
         device_param->opencl_platform_vendor_id = opencl_platform_vendor_id;
 
-        device_param->opencl_device = opencl_platform_devices[(is_virtualized == true) ? virthost : (int) opencl_platform_devices_idx];
+        device_param->opencl_device = opencl_platform_devices[(is_virtualized == true) ? *virthost : (int) opencl_platform_devices_idx];
 
         //device_param->opencl_platform = opencl_platform;
 
@@ -7688,12 +7691,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           #if defined (__APPLE__)
           if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
           {
-            need_iokit = true;
+            *need_iokit = true;
           }
           #endif
 
           #if defined (__linux__)
-          need_sysfs_cpu = true;
+          *need_sysfs_cpu = true;
           #endif
         }
 
@@ -7701,19 +7704,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         {
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
-            need_adl = true;
+            *need_adl = true;
 
             #if defined (__linux__)
-            need_sysfs_amdgpu = true;
+            *need_sysfs_amdgpu = true;
             #endif
           }
 
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
-            need_nvml = true;
+            *need_nvml = true;
 
             #if defined (_WIN) || defined (__CYGWIN__)
-            need_nvapi = true;
+            *need_nvapi = true;
             #endif
           }
 
@@ -7722,7 +7725,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           {
             if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
             {
-              need_iokit = true;
+              *need_iokit = true;
             }
           }
           #endif
@@ -8231,7 +8234,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
            * activate device
            */
 
-          device_param->bridge_link_device = bridge_link_device++;
+          device_param->bridge_link_device = (*bridge_link_device)++;
 
           opencl_devices_active++;
         }
@@ -8242,10 +8245,57 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
   backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
   backend_ctx->opencl_devices_active  = opencl_devices_active;
 
+  return 0;
+}
+
+int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
+{
+  const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
+        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
+        user_options_t  *user_options  = hashcat_ctx->user_options;
+
+  if (backend_ctx->enabled == false) return 0;
+
+  hc_device_param_t *devices_param = backend_ctx->devices_param;
+
+  bool need_adl           = false;
+  bool need_nvml          = false;
+  bool need_nvapi         = false;
+  bool need_sysfs_amdgpu  = false;
+  bool need_sysfs_cpu     = false;
+  bool need_iokit         = false;
+
+  int bridge_link_device = 0; // this will only count active device
+
+  int backend_devices_idx = 0; // this will not only count active devices
+
+  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
+
+  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
+
+  int virthost = -1;
+  int virthost_finder = user_options->backend_devices_virthost;
+
+  // CUDA
+
+  backend_ctx_devices_init_cuda (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_nvml, &need_nvapi);
+
+  // HIP
+
+  backend_ctx_devices_init_hip (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_adl, &need_sysfs_amdgpu);
+
+  // Metal
+
+  backend_ctx_devices_init_metal (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_iokit);
+
+  // OCL
+
+  backend_ctx_devices_init_opencl (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_nvml, &need_nvapi, &need_adl, &need_sysfs_amdgpu, &need_sysfs_cpu, &need_iokit);
+
   // all devices combined go into backend_* variables
 
-  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + hip_devices_cnt    + metal_devices_cnt    + opencl_devices_cnt;
-  backend_ctx->backend_devices_active = cuda_devices_active + hip_devices_active + metal_devices_active + opencl_devices_active;
+  backend_ctx->backend_devices_cnt    = backend_ctx->cuda_devices_cnt    + backend_ctx->hip_devices_cnt    + backend_ctx->metal_devices_cnt    + backend_ctx->opencl_devices_cnt;
+  backend_ctx->backend_devices_active = backend_ctx->cuda_devices_active + backend_ctx->hip_devices_active + backend_ctx->metal_devices_active + backend_ctx->opencl_devices_active;
 
   #if defined (__APPLE__)
   // disable Metal devices if at least one OpenCL device is enabled

From 0c2ed0d1991e97c1ae76006adfee656e637a93f2 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 29 Jun 2025 14:39:14 +0200
Subject: [PATCH 44/83] Update plugins that benefit from an artificially
 limited register count (NVIDIA). Update default hash settings to 64MiB:3:4
 for Argon2 in -m 70000, following RFC 9106 recommendations. Add option
 OPTS_TYPE_THREAD_MULTI_DISABLE: allows plugin developers to disable scaling
 the password candidate batch size based on device thread count. This can be
 useful for super slow hash algorithms that utilize threads differently, e.g.,
 when the algorithm allows parallelization. Note: thread count for the device
 can still be set normally. Add options
 OPTI_TYPE_SLOW_HASH_DIMY_INIT/LOOP/COMP: enable 2D launches for slow hash
 init/loop/comp kernel with dimensions X and Y. The Y value must be set via
 salt->salt_dimy attribute. Change autotune kernel-loops start value to the
 lowest multiple of the target hash iteration count, if kernel_loops_min
 permits. Fixed a bug in autotune where kernel_threads_max was not respected
 during initial init and loop-prepare kernel runs.

---
 OpenCL/inc_types.h           |   1 +
 include/types.h              |  18 ++++---
 src/autotune.c               |  22 ++++----
 src/backend.c                | 101 +++++++++++++++++++++++++++++------
 src/modules/module_01800.c   |   1 +
 src/modules/module_06400.c   |   1 +
 src/modules/module_06800.c   |   1 +
 src/modules/module_07700.c   |   1 +
 src/modules/module_07701.c   |   1 +
 src/modules/module_08300.c   |   3 +-
 src/modules/module_13751.c   |   3 +-
 src/modules/module_13752.c   |   3 +-
 src/modules/module_13753.c   |   3 +-
 src/modules/module_13761.c   |   3 +-
 src/modules/module_13762.c   |   3 +-
 src/modules/module_13763.c   |   3 +-
 src/modules/module_14800.c   |   1 +
 src/modules/module_14900.c   |   3 +-
 src/modules/module_15600.c   |   1 +
 src/modules/module_16300.c   |   1 +
 src/modules/module_16700.c   |   1 +
 src/modules/module_16900.c   |   1 +
 src/modules/module_18100.c   |   1 +
 src/modules/module_20800.c   |   1 +
 src/modules/module_21300.c   |   1 +
 src/modules/module_21500.c   |   1 +
 src/modules/module_21700.c   |   1 +
 src/modules/module_22100.c   |   3 +-
 src/modules/module_22300.c   |   1 +
 src/modules/module_22921.c   |   3 +-
 src/modules/module_22941.c   |   3 +-
 src/modules/module_23400.c   |   1 +
 src/modules/module_23600.c   |   1 +
 src/modules/module_23800.c   |   3 +-
 src/modules/module_24200.c   |   1 +
 src/modules/module_24420.c   |   1 +
 src/modules/module_25500.c   |   1 +
 src/modules/module_25900.c   |   3 +-
 src/modules/module_26000.c   |   1 +
 src/modules/module_26100.c   |   1 +
 src/modules/module_26600.c   |   1 +
 src/modules/module_26700.c   |   3 +-
 src/modules/module_26800.c   |   3 +-
 src/modules/module_26900.c   |   1 +
 src/modules/module_27300.c   |   1 +
 src/modules/module_27400.c   |   1 +
 src/modules/module_27500.c   |   1 +
 src/modules/module_27600.c   |   1 +
 src/modules/module_29451.c   |   3 +-
 src/modules/module_29452.c   |   3 +-
 src/modules/module_29453.c   |   3 +-
 src/modules/module_29461.c   |   3 +-
 src/modules/module_29462.c   |   3 +-
 src/modules/module_29463.c   |   3 +-
 src/modules/module_70000.c   |   2 +-
 src/shared.c                 |   6 +++
 tools/test_modules/m70000.pm |   2 +-
 57 files changed, 190 insertions(+), 54 deletions(-)

diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 233e28958..a13c89b8e 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -2008,6 +2008,7 @@ typedef struct salt
   u32 salt_len_pc;
   u32 salt_iter;
   u32 salt_iter2;
+  u32 salt_dimy;
   u32 salt_sign[2];
   u32 salt_repeats;
 
diff --git a/include/types.h b/include/types.h
index 22c57d85a..600e7bd93 100644
--- a/include/types.h
+++ b/include/types.h
@@ -412,6 +412,9 @@ typedef enum opti_type
   OPTI_TYPE_REGISTER_LIMIT        = (1 << 20), // We'll limit the register count to 128
   OPTI_TYPE_SLOW_HASH_SIMD_INIT2  = (1 << 21),
   OPTI_TYPE_SLOW_HASH_SIMD_LOOP2  = (1 << 22),
+  OPTI_TYPE_SLOW_HASH_DIMY_INIT   = (1 << 23),
+  OPTI_TYPE_SLOW_HASH_DIMY_LOOP   = (1 << 24),
+  OPTI_TYPE_SLOW_HASH_DIMY_COMP   = (1 << 25),
 
 } opti_type_t;
 
@@ -476,14 +479,17 @@ typedef enum opts_type
   OPTS_TYPE_DYNAMIC_SHARED    = (1ULL << 53), // use dynamic shared memory (note: needs special kernel changes)
   OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 54), // some algos use JiT in combinations with a salt or create too much startup time
   OPTS_TYPE_MP_MULTI_DISABLE  = (1ULL << 55), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
-  OPTS_TYPE_NATIVE_THREADS    = (1ULL << 56), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
-  OPTS_TYPE_MAXIMUM_THREADS   = (1ULL << 57), // disable else branch in pre-compilation thread count optimization setting
-  OPTS_TYPE_POST_AMP_UTF16LE  = (1ULL << 58), // run the utf8 to utf16le conversion kernel after they have been processed from amplifiers
+  OPTS_TYPE_THREAD_MULTI_DISABLE              // do not multiply the kernel-power with the thread count per device for super slow algos
+                              = (1ULL << 56),
+  OPTS_TYPE_NATIVE_THREADS    = (1ULL << 57), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
+  OPTS_TYPE_MAXIMUM_THREADS   = (1ULL << 58), // disable else branch in pre-compilation thread count optimization setting
+  OPTS_TYPE_POST_AMP_UTF16LE  = (1ULL << 59), // run the utf8 to utf16le conversion kernel after they have been processed from amplifiers
   OPTS_TYPE_AUTODETECT_DISABLE
-                              = (1ULL << 59), // skip autodetect engine
-  OPTS_TYPE_STOCK_MODULE      = (1ULL << 60), // module included with hashcat default distribution
+                              = (1ULL << 60), // skip autodetect engine
+  OPTS_TYPE_STOCK_MODULE      = (1ULL << 61), // module included with hashcat default distribution
   OPTS_TYPE_MULTIHASH_DESPITE_ESALT
-                              = (1ULL << 61), // overrule multihash cracking check same salt but not same esalt
+                              = (1ULL << 62), // overrule multihash cracking check same salt but not same esalt
+  OPTS_TYPE_MAXIMUM_ACCEL     = (1ULL << 63)  // try to maximize kernel-accel during autotune
 
 } opts_type_t;
 
diff --git a/src/autotune.c b/src/autotune.c
index 87637b29d..065c0a217 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -43,7 +43,8 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   device_param->kernel_param.loop_cnt = kernel_loops; // not a bug, both need to be set
   device_param->kernel_param.il_cnt   = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
 
-  const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads;
+  const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
+                           * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads);
 
   u32 kernel_power_try = hardware_power * kernel_accel;
 
@@ -133,7 +134,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     device_param->kernel_accel   = kernel_accel_min;
     device_param->kernel_loops   = kernel_loops_min;
     device_param->kernel_threads = kernel_threads_min;
-    device_param->hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads_min;
+    device_param->hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
+                                 * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads_min);
     device_param->kernel_power   = device_param->hardware_power * kernel_accel_min;
   }
 
@@ -212,7 +214,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     // from here it's clear we are allowed to autotune
     // so let's init some fake words
 
-    const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads_max;
+    const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
+                                 * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads_max);
 
     u32 kernel_power_max = hardware_power_max * kernel_accel_max;
 
@@ -298,13 +301,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     {
       const u32 kernel_threads_sav = device_param->kernel_threads;
 
-      device_param->kernel_threads = device_param->kernel_wgs1;
+      device_param->kernel_threads = MIN (device_param->kernel_wgs1, kernel_threads_max);
 
       run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_max, false, 0, true);
 
       if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
       {
-        device_param->kernel_threads = device_param->kernel_wgs2p;
+        device_param->kernel_threads = MIN (device_param->kernel_wgs2p, kernel_threads_max);
 
         run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, 0, kernel_power_max, false, 0, true);
       }
@@ -330,8 +333,6 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     // v7 autotuner is a lot more straight forward
 
-    u32 kernel_loops_min_start = kernel_loops_min;
-
     if (hashes && hashes->st_salts_buf)
     {
       u32 start = kernel_loops_max;
@@ -348,12 +349,12 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
         if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
         {
-          kernel_loops_min_start = start;
+          kernel_loops = start;
         }
       }
     }
 
-    for (u32 kernel_loops_test = kernel_loops_min_start; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
+    for (u32 kernel_loops_test = kernel_loops; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
     {
       double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_test, kernel_threads_min, 2);
 
@@ -564,7 +565,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   device_param->kernel_loops   = kernel_loops;
   device_param->kernel_threads = kernel_threads;
 
-  const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads;
+  const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
+                           * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : device_param->kernel_threads);
 
   device_param->hardware_power = hardware_power;
 
diff --git a/src/backend.c b/src/backend.c
index fc0051dd4..00cac2245 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2598,7 +2598,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
     if (kernel_threads == 0) kernel_threads = 1;
 
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+    {
+      num_elements = CEILDIV (num_elements, kernel_threads);
+    }
 
     if (kern_run == KERN_RUN_1)
     {
@@ -2636,14 +2639,29 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
+    u32 gridDimX = num_elements;
+    u32 gridDimY = 1;
+    u32 gridDimZ = 1;
+
+    u32 blockDimX = kernel_threads;
+    u32 blockDimY = 1;
+    u32 blockDimZ = 1;
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
+      blockDimY = hashcat_ctx->hashes->salts_buf->salt_dimy;
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
+      blockDimY = hashcat_ctx->hashes->salts_buf->salt_dimy;
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
+      blockDimY = hashcat_ctx->hashes->salts_buf->salt_dimy;
+
     if (is_autotune == true)
     {
-      if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
+      if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
     }
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
 
-    if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
+    if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
 
@@ -2699,7 +2717,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
     if (kernel_threads == 0) kernel_threads = 1;
 
-    num_elements = CEILDIV (num_elements, kernel_threads);
+    if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+    {
+      num_elements = CEILDIV (num_elements, kernel_threads);
+    }
 
     if (kern_run == KERN_RUN_1)
     {
@@ -2737,14 +2758,31 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
+    u32 gridDimX = num_elements;
+    u32 gridDimY = 1;
+    u32 gridDimZ = 1;
+
+    u32 blockDimX = kernel_threads;
+    u32 blockDimY = 1;
+    u32 blockDimZ = 1;
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
+      blockDimY = hashcat_ctx->hashes->salts_buf->salt_dimy;
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
+      blockDimY = hashcat_ctx->hashes->salts_buf->salt_dimy;
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
+      blockDimY = hashcat_ctx->hashes->salts_buf->salt_dimy;
+
+    //printf ("%d %d %d %d %d %d %d\n", kern_run, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ);
+
     if (is_autotune == true)
     {
-      if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1;
+      if (hc_hipLaunchKernel (hashcat_ctx, hip_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1;
     }
 
     if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event1, device_param->hip_stream) == -1) return -1;
 
-    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, num_elements, 1, 1, kernel_threads, 1, 1, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1;
+    if (hc_hipLaunchKernel (hashcat_ctx, hip_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->hip_stream, device_param->kernel_params, NULL) == -1) return -1;
 
     if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event2, device_param->hip_stream) == -1) return -1;
 
@@ -2984,17 +3022,44 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+    {
+      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    }
+    else
+    {
+      num_elements = num_elements * kernel_threads;
+    }
 
-    const size_t global_work_size[3] = { num_elements,   1, 1 };
-    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+    size_t global_work_size[3] = { num_elements,   1, 1 };
+    size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    cl_uint work_dim = 1;
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
+    {
+      global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
+    }
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
+    {
+      global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
+    }
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
+    {
+      global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
+    }
 
     if (is_autotune == true)
     {
-      if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
+      if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
     }
 
-    if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
+    if (hc_clEnqueueNDRangeKernel (hashcat_ctx, device_param->opencl_command_queue, opencl_kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, &opencl_event) == -1) return -1;
 
     // spin damper section
 
@@ -8952,7 +9017,8 @@ void backend_ctx_devices_sync_tuning (hashcat_ctx_t *hashcat_ctx)
       device_param_dst->kernel_loops   = device_param_src->kernel_loops;
       device_param_dst->kernel_threads = device_param_src->kernel_threads;
 
-      const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param_dst->device_processors) * device_param_dst->kernel_threads;
+      const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param_dst->device_processors)
+                               * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : device_param_dst->kernel_threads);
 
       device_param_dst->hardware_power = hardware_power;
 
@@ -9522,7 +9588,11 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
 
       hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "-D MAX_THREADS_PER_BLOCK=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
       hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-architecture=%s", device_param->gcnArchName);
-      hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
+
+      if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+      {
+        hc_asprintf (&hiprtc_options[hiprtc_options_idx++], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
+      }
 
       // untested but it should work
       #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
@@ -10436,7 +10506,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * device properties
      */
 
-    const u32 device_processors = device_param->device_processors;
+    //const u32 device_processors = device_param->device_processors;
 
     /**
      * device threads
@@ -16059,7 +16129,8 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     //    device_param->kernel_threads = kernel_threads;
     device_param->kernel_threads = 0;
 
-    u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_processors) * device_param->kernel_threads_max;
+    const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
+                                 * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : device_param->kernel_threads_max);
 
     u32 kernel_accel_min = device_param->kernel_accel_min;
     u32 kernel_accel_max = device_param->kernel_accel_max;
diff --git a/src/modules/module_01800.c b/src/modules/module_01800.c
index fea4851c6..f0d2e9623 100644
--- a/src/modules/module_01800.c
+++ b/src/modules/module_01800.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "sha512crypt $6$, SHA512 (Unix)";
 static const u64   KERN_TYPE      = 1800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_06400.c b/src/modules/module_06400.c
index b7ac9d87f..a553846c7 100644
--- a/src/modules/module_06400.c
+++ b/src/modules/module_06400.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "AIX {ssha256}";
 static const u64   KERN_TYPE      = 6400;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_06800.c b/src/modules/module_06800.c
index 215f26992..e6cf96fd6 100644
--- a/src/modules/module_06800.c
+++ b/src/modules/module_06800.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "LastPass + LastPass sniffed";
 static const u64   KERN_TYPE      = 6800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_07700.c b/src/modules/module_07700.c
index 2642a0bb7..1d783d351 100644
--- a/src/modules/module_07700.c
+++ b/src/modules/module_07700.c
@@ -21,6 +21,7 @@ static const char *HASH_NAME      = "SAP CODVN B (BCODE)";
 static const u64   KERN_TYPE      = 7700;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_PRECOMPUTE_INIT
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_NOT_ITERATED;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_07701.c b/src/modules/module_07701.c
index bc852639b..232d7ca4d 100644
--- a/src/modules/module_07701.c
+++ b/src/modules/module_07701.c
@@ -21,6 +21,7 @@ static const char *HASH_NAME      = "SAP CODVN B (BCODE) from RFC_READ_TABLE";
 static const u64   KERN_TYPE      = 7701;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_PRECOMPUTE_INIT
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_NOT_ITERATED;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_08300.c b/src/modules/module_08300.c
index c499667ae..80dd3475d 100644
--- a/src/modules/module_08300.c
+++ b/src/modules/module_08300.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_5;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_SERVER;
 static const char *HASH_NAME      = "DNSSEC (NSEC3)";
 static const u64   KERN_TYPE      = 8300;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_BE
                                   | OPTS_TYPE_ST_HEX
diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c
index dac31a633..ee27acfd6 100644
--- a/src/modules/module_13751.c
+++ b/src/modules/module_13751.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 512 bit (legacy)";
 static const u64   KERN_TYPE      = 13751;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c
index e959a80af..095758dd2 100644
--- a/src/modules/module_13752.c
+++ b/src/modules/module_13752.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1024 bit (legacy)";
 static const u64   KERN_TYPE      = 13752;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c
index 277afaf02..4eee98625 100644
--- a/src/modules/module_13753.c
+++ b/src/modules/module_13753.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1536 bit (legacy)";
 static const u64   KERN_TYPE      = 13753;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
diff --git a/src/modules/module_13761.c b/src/modules/module_13761.c
index e33a693e2..5b1b82b27 100644
--- a/src/modules/module_13761.c
+++ b/src/modules/module_13761.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 512 bit + boot-mode (legacy)";
 static const u64   KERN_TYPE      = 13751;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
diff --git a/src/modules/module_13762.c b/src/modules/module_13762.c
index 0f5f18545..6f1a27929 100644
--- a/src/modules/module_13762.c
+++ b/src/modules/module_13762.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1024 bit + boot-mode (legacy)";
 static const u64   KERN_TYPE      = 13752;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
diff --git a/src/modules/module_13763.c b/src/modules/module_13763.c
index c9e28d4c7..fb50002a2 100644
--- a/src/modules/module_13763.c
+++ b/src/modules/module_13763.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1536 bit + boot-mode (legacy)";
 static const u64   KERN_TYPE      = 13753;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_BINARY_HASHFILE
diff --git a/src/modules/module_14800.c b/src/modules/module_14800.c
index 9f3c1ca0f..1e4b91a15 100644
--- a/src/modules/module_14800.c
+++ b/src/modules/module_14800.c
@@ -21,6 +21,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "iTunes backup >= 10.0";
 static const u64   KERN_TYPE      = 14800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP2;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
diff --git a/src/modules/module_14900.c b/src/modules/module_14900.c
index cbe009024..fcc639688 100644
--- a/src/modules/module_14900.c
+++ b/src/modules/module_14900.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_CIPHER_KPA;
 static const char *HASH_NAME      = "Skip32 (PT = $salt, key = $pass)";
 static const u64   KERN_TYPE      = 14900;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_SUGGEST_KG;
diff --git a/src/modules/module_15600.c b/src/modules/module_15600.c
index 1f2b7b9a3..66469d9bf 100644
--- a/src/modules/module_15600.c
+++ b/src/modules/module_15600.c
@@ -21,6 +21,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_CRYPTOCURRENCY_WALLET;
 static const char *HASH_NAME      = "Ethereum Wallet, PBKDF2-HMAC-SHA256";
 static const u64   KERN_TYPE      = 15600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_16300.c b/src/modules/module_16300.c
index e7507f1fa..6a12a1b39 100644
--- a/src/modules/module_16300.c
+++ b/src/modules/module_16300.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_CRYPTOCURRENCY_WALLET;
 static const char *HASH_NAME      = "Ethereum Pre-Sale Wallet, PBKDF2-HMAC-SHA256";
 static const u64   KERN_TYPE      = 16300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_16700.c b/src/modules/module_16700.c
index 9dce98e62..2fec2f557 100644
--- a/src/modules/module_16700.c
+++ b/src/modules/module_16700.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "FileVault 2";
 static const u64   KERN_TYPE      = 16200;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_16900.c b/src/modules/module_16900.c
index 443da6007..70eca3f16 100644
--- a/src/modules/module_16900.c
+++ b/src/modules/module_16900.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "Ansible Vault";
 static const u64   KERN_TYPE      = 16900;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_18100.c b/src/modules/module_18100.c
index 46dadfe60..678cb8265 100644
--- a/src/modules/module_18100.c
+++ b/src/modules/module_18100.c
@@ -21,6 +21,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_OTP;
 static const char *HASH_NAME      = "TOTP (HMAC-SHA1)";
 static const u64   KERN_TYPE      = 18100;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_NOT_ITERATED;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_BE
diff --git a/src/modules/module_20800.c b/src/modules/module_20800.c
index 6cece615d..1330b99bc 100644
--- a/src/modules/module_20800.c
+++ b/src/modules/module_20800.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_SALTED;
 static const char *HASH_NAME      = "sha256(md5($pass))";
 static const u64   KERN_TYPE      = 20800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_21300.c b/src/modules/module_21300.c
index 75e57c1f4..34ebff7f5 100644
--- a/src/modules/module_21300.c
+++ b/src/modules/module_21300.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_SALTED;
 static const char *HASH_NAME      = "md5($salt.sha1($salt.$pass))";
 static const u64   KERN_TYPE      = 21300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_21500.c b/src/modules/module_21500.c
index dba580a7c..8936be11c 100644
--- a/src/modules/module_21500.c
+++ b/src/modules/module_21500.c
@@ -21,6 +21,7 @@ static const char *HASH_NAME      = "SolarWinds Orion";
 static const u64   KERN_TYPE      = 21500;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_USES_BITS_64
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_21700.c b/src/modules/module_21700.c
index 65536027b..e00cf232f 100644
--- a/src/modules/module_21700.c
+++ b/src/modules/module_21700.c
@@ -22,6 +22,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_CRYPTOCURRENCY_WALLET;
 static const char *HASH_NAME      = "Electrum Wallet (Salt-Type 4)";
 static const u64   KERN_TYPE      = 21700;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
diff --git a/src/modules/module_22100.c b/src/modules/module_22100.c
index 344218243..4d79d30bf 100644
--- a/src/modules/module_22100.c
+++ b/src/modules/module_22100.c
@@ -20,7 +20,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "BitLocker";
 static const u64   KERN_TYPE      = 22100;
-static const u32   OPTI_TYPE      = OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+static const u32   OPTI_TYPE      = OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_MP_MULTI_DISABLE;
diff --git a/src/modules/module_22300.c b/src/modules/module_22300.c
index a85d1d753..04a4bc08c 100644
--- a/src/modules/module_22300.c
+++ b/src/modules/module_22300.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_SALTED;
 static const char *HASH_NAME      = "sha256($salt.$pass.$salt)";
 static const u64   KERN_TYPE      = 22300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_22921.c b/src/modules/module_22921.c
index 163aacee9..c6471a232 100644
--- a/src/modules/module_22921.c
+++ b/src/modules/module_22921.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_PRIVATE_KEY;
 static const char *HASH_NAME      = "RSA/DSA/EC/OpenSSH Private Keys ($6$)";
 static const u64   KERN_TYPE      = 22921;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
diff --git a/src/modules/module_22941.c b/src/modules/module_22941.c
index 4b4bf09ac..65e7d7c2b 100644
--- a/src/modules/module_22941.c
+++ b/src/modules/module_22941.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_PRIVATE_KEY;
 static const char *HASH_NAME      = "RSA/DSA/EC/OpenSSH Private Keys ($4$)";
 static const u64   KERN_TYPE      = 22941;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
diff --git a/src/modules/module_23400.c b/src/modules/module_23400.c
index 13743815f..c2cc2a2e2 100644
--- a/src/modules/module_23400.c
+++ b/src/modules/module_23400.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "Bitwarden";
 static const u64   KERN_TYPE      = 23400;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_23600.c b/src/modules/module_23600.c
index fdb3467d1..0b8994ff7 100644
--- a/src/modules/module_23600.c
+++ b/src/modules/module_23600.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "AxCrypt 2 AES-256";
 static const u64   KERN_TYPE      = 23600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
diff --git a/src/modules/module_23800.c b/src/modules/module_23800.c
index c6c8fec61..c75a9aaaf 100644
--- a/src/modules/module_23800.c
+++ b/src/modules/module_23800.c
@@ -20,7 +20,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4; // actually only DGST_SIZE_4_
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "RAR3-p (Compressed)";
 static const u64   KERN_TYPE      = 23800;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_HOOK23
diff --git a/src/modules/module_24200.c b/src/modules/module_24200.c
index 8b6a8e5c8..20ea6ccec 100644
--- a/src/modules/module_24200.c
+++ b/src/modules/module_24200.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_DATABASE_SERVER;
 static const char *HASH_NAME      = "MongoDB ServerKey SCRAM-SHA-256";
 static const u64   KERN_TYPE      = 24200;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_24420.c b/src/modules/module_24420.c
index 39820834a..544e07ee1 100644
--- a/src/modules/module_24420.c
+++ b/src/modules/module_24420.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PRIVATE_KEY;
 static const char *HASH_NAME      = "PKCS#8 Private Keys (PBKDF2-HMAC-SHA256 + 3DES/AES)";
 static const u64   KERN_TYPE      = 24420;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_25500.c b/src/modules/module_25500.c
index afc451fb9..06a1e795d 100644
--- a/src/modules/module_25500.c
+++ b/src/modules/module_25500.c
@@ -21,6 +21,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_CRYPTOCURRENCY_WALLET;
 static const char *HASH_NAME      = "Stargazer Stellar Wallet XLM";
 static const u64   KERN_TYPE      = 25500;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_25900.c b/src/modules/module_25900.c
index 7ad951f6d..a460a3bd8 100644
--- a/src/modules/module_25900.c
+++ b/src/modules/module_25900.c
@@ -19,7 +19,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_SERVER;
 static const char *HASH_NAME      = "KNX IP Secure - Device Authentication Code";
 static const u64   KERN_TYPE      = 25900;
-static const u32   OPTI_TYPE      = OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+static const u32   OPTI_TYPE      = OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_DEEP_COMP_KERNEL;
diff --git a/src/modules/module_26000.c b/src/modules/module_26000.c
index 35a6dd937..8ed90d5ee 100644
--- a/src/modules/module_26000.c
+++ b/src/modules/module_26000.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "Mozilla key3.db";
 static const u64   KERN_TYPE      = 26000;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_NOT_ITERATED;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_BE;
diff --git a/src/modules/module_26100.c b/src/modules/module_26100.c
index c42958289..3414fcbdb 100644
--- a/src/modules/module_26100.c
+++ b/src/modules/module_26100.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "Mozilla key4.db";
 static const u64   KERN_TYPE      = 26100;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_26600.c b/src/modules/module_26600.c
index 926555e94..92b5c8efc 100644
--- a/src/modules/module_26600.c
+++ b/src/modules/module_26600.c
@@ -21,6 +21,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_CRYPTOCURRENCY_WALLET;
 static const char *HASH_NAME      = "MetaMask Wallet (needs all data, checks AES-GCM tag)";
 static const u64   KERN_TYPE      = 26600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_26700.c b/src/modules/module_26700.c
index 63854c0d2..47427ba32 100644
--- a/src/modules/module_26700.c
+++ b/src/modules/module_26700.c
@@ -21,7 +21,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
 static const char *HASH_NAME      = "SNMPv3 HMAC-SHA224-128";
 static const u64   KERN_TYPE      = 26700;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_NATIVE_THREADS
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_26800.c b/src/modules/module_26800.c
index 024bfa636..44d72f7df 100644
--- a/src/modules/module_26800.c
+++ b/src/modules/module_26800.c
@@ -21,7 +21,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_6;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
 static const char *HASH_NAME      = "SNMPv3 HMAC-SHA256-192";
 static const u64   KERN_TYPE      = 26800;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_NATIVE_THREADS
                                   | OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_26900.c b/src/modules/module_26900.c
index 8d916ebf2..29e3cdc92 100644
--- a/src/modules/module_26900.c
+++ b/src/modules/module_26900.c
@@ -22,6 +22,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
 static const char *HASH_NAME      = "SNMPv3 HMAC-SHA384-256";
 static const u64   KERN_TYPE      = 26900;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_NATIVE_THREADS
diff --git a/src/modules/module_27300.c b/src/modules/module_27300.c
index ff20acc9d..4d607051e 100644
--- a/src/modules/module_27300.c
+++ b/src/modules/module_27300.c
@@ -22,6 +22,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
 static const char *HASH_NAME      = "SNMPv3 HMAC-SHA512-384";
 static const u64   KERN_TYPE      = 27300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_USES_BITS_64;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_NATIVE_THREADS
diff --git a/src/modules/module_27400.c b/src/modules/module_27400.c
index 7d9f0fbd4..059b50cb5 100644
--- a/src/modules/module_27400.c
+++ b/src/modules/module_27400.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VMware VMX (PBKDF2-HMAC-SHA1 + AES-256-CBC)";
 static const u64   KERN_TYPE      = 27400;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
diff --git a/src/modules/module_27500.c b/src/modules/module_27500.c
index ebabe24e5..6b79eba3a 100644
--- a/src/modules/module_27500.c
+++ b/src/modules/module_27500.c
@@ -22,6 +22,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VirtualBox (PBKDF2-HMAC-SHA256 & AES-128-XTS)";
 static const u64   KERN_TYPE      = 27500;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP2;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
diff --git a/src/modules/module_27600.c b/src/modules/module_27600.c
index b4cdade54..3259ce05f 100644
--- a/src/modules/module_27600.c
+++ b/src/modules/module_27600.c
@@ -22,6 +22,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VirtualBox (PBKDF2-HMAC-SHA256 & AES-256-XTS)";
 static const u64   KERN_TYPE      = 27600;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP2;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
diff --git a/src/modules/module_29451.c b/src/modules/module_29451.c
index c8e10bdcf..fe3c0737f 100644
--- a/src/modules/module_29451.c
+++ b/src/modules/module_29451.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 512 bit";
 static const u64   KERN_TYPE      = 13751; // old kernel used here
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_LOOP_EXTENDED
diff --git a/src/modules/module_29452.c b/src/modules/module_29452.c
index 395839f64..842f36f47 100644
--- a/src/modules/module_29452.c
+++ b/src/modules/module_29452.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1024 bit";
 static const u64   KERN_TYPE      = 13752; // old kernel used here
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_LOOP_EXTENDED
diff --git a/src/modules/module_29453.c b/src/modules/module_29453.c
index 97bf9a333..6162337de 100644
--- a/src/modules/module_29453.c
+++ b/src/modules/module_29453.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1536 bit";
 static const u64   KERN_TYPE      = 13753; // old kernel used here
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_LOOP_EXTENDED
diff --git a/src/modules/module_29461.c b/src/modules/module_29461.c
index be7dcfc2e..ff2705da7 100644
--- a/src/modules/module_29461.c
+++ b/src/modules/module_29461.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 512 bit + boot-mode";
 static const u64   KERN_TYPE      = 13751; // old kernel used here
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_LOOP_EXTENDED
diff --git a/src/modules/module_29462.c b/src/modules/module_29462.c
index 7e9a6535c..14b53d6ad 100644
--- a/src/modules/module_29462.c
+++ b/src/modules/module_29462.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1024 bit + boot-mode";
 static const u64   KERN_TYPE      = 13752; // old kernel used here
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_LOOP_EXTENDED
diff --git a/src/modules/module_29463.c b/src/modules/module_29463.c
index e98c71289..7a5c8ac92 100644
--- a/src/modules/module_29463.c
+++ b/src/modules/module_29463.c
@@ -23,7 +23,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FDE;
 static const char *HASH_NAME      = "VeraCrypt SHA256 + XTS 1536 bit + boot-mode";
 static const u64   KERN_TYPE      = 13753; // old kernel used here
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_LOOP_EXTENDED
diff --git a/src/modules/module_70000.c b/src/modules/module_70000.c
index 01504ea09..57bd77d12 100644
--- a/src/modules/module_70000.c
+++ b/src/modules/module_70000.c
@@ -29,7 +29,7 @@ static const u64   BRIDGE_TYPE    = BRIDGE_TYPE_MATCH_TUNINGS // optional - impr
                                   | BRIDGE_TYPE_REPLACE_LOOP;
 static const char *BRIDGE_NAME    = "argon2id_reference";
 static const char *ST_PASS        = "hashcat";
-static const char *ST_HASH        = "$argon2id$v=19$m=4096,t=3,p=1$FoIjFnZlM2JSJWYXUgMFAw$eYKMzhbW8uyT1LLtKRdRcJj2CQeRrdr2pKv/Y71YbAQ";
+static const char *ST_HASH        = "$argon2id$v=19$m=65536,t=3,p=1$FBMjI4RJBhIykCgol1KEJA$2ky5GAdhT1kH4kIgPN/oERE3Taiy43vNN70a3HpiKQU";
 
 u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
 u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
diff --git a/src/shared.c b/src/shared.c
index a00025352..f7dbcd78d 100644
--- a/src/shared.c
+++ b/src/shared.c
@@ -104,6 +104,9 @@ static const char *const OPTI_STR_USES_BITS_8          = "Uses-8-Bit";
 static const char *const OPTI_STR_USES_BITS_16         = "Uses-16-Bit";
 static const char *const OPTI_STR_USES_BITS_32         = "Uses-32-Bit";
 static const char *const OPTI_STR_USES_BITS_64         = "Uses-64-Bit";
+static const char *const OPTI_STR_SLOW_HASH_DIMY_INIT  = "Slow-Hash-DimensionY-INIT";
+static const char *const OPTI_STR_SLOW_HASH_DIMY_COMP  = "Slow-Hash-DimensionY-LOOP";
+static const char *const OPTI_STR_SLOW_HASH_DIMY_LOOP  = "Slow-Hash-DimensionY-COMP";
 
 static const char *const HASH_CATEGORY_UNDEFINED_STR              = "Undefined";
 static const char *const HASH_CATEGORY_RAW_HASH_STR               = "Raw Hash";
@@ -1072,6 +1075,9 @@ const char *stroptitype (const u32 opti_type)
     case OPTI_TYPE_SLOW_HASH_SIMD_LOOP:  return OPTI_STR_SLOW_HASH_SIMD_LOOP;
     case OPTI_TYPE_SLOW_HASH_SIMD_LOOP2: return OPTI_STR_SLOW_HASH_SIMD_LOOP2;
     case OPTI_TYPE_SLOW_HASH_SIMD_COMP:  return OPTI_STR_SLOW_HASH_SIMD_COMP;
+    case OPTI_TYPE_SLOW_HASH_DIMY_INIT:  return OPTI_STR_SLOW_HASH_DIMY_INIT;
+    case OPTI_TYPE_SLOW_HASH_DIMY_LOOP:  return OPTI_STR_SLOW_HASH_DIMY_LOOP;
+    case OPTI_TYPE_SLOW_HASH_DIMY_COMP:  return OPTI_STR_SLOW_HASH_DIMY_COMP;
     case OPTI_TYPE_USES_BITS_8:          return OPTI_STR_USES_BITS_8;
     case OPTI_TYPE_USES_BITS_16:         return OPTI_STR_USES_BITS_16;
     case OPTI_TYPE_USES_BITS_32:         return OPTI_STR_USES_BITS_32;
diff --git a/tools/test_modules/m70000.pm b/tools/test_modules/m70000.pm
index 4b44334ac..72861ae49 100644
--- a/tools/test_modules/m70000.pm
+++ b/tools/test_modules/m70000.pm
@@ -17,7 +17,7 @@ sub module_generate_hash
 {
   my $word  = shift;
   my $salt  = shift;
-  my $m     = shift // 4096;
+  my $m     = shift // 65536;
   my $t     = shift // 3;
   my $p     = shift // 1;
   my $len   = shift // random_number (1, 2) * 16;

From 78c8180e12bcea14e227bd1b35237afaabea80b6 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 15:33:59 +0200
Subject: [PATCH 45/83] set static void to runtime-specific functions and
 simplify parameters

---
 src/backend.c | 67 ++++++++++++++++++++-------------------------------
 1 file changed, 26 insertions(+), 41 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 23baa8b25..9ffd6e2cc 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5418,7 +5418,7 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
 
-int backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device, bool *need_nvml, MAYBE_UNUSED bool *need_nvapi)
+static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
 {
   backend_ctx_t     *backend_ctx   = hashcat_ctx->backend_ctx;
   user_options_t    *user_options  = hashcat_ctx->user_options;
@@ -5767,10 +5767,10 @@ int backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_virtualiz
 
       if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
       {
-        *need_nvml = true;
+        backend_ctx->need_nvml = true;
 
         #if defined (_WIN) || defined (__CYGWIN__)
-        *need_nvapi = true;
+        backend_ctx->need_nvapi = true;
         #endif
       }
 
@@ -5891,11 +5891,9 @@ int backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_virtualiz
 
   backend_ctx->cuda_devices_cnt     = cuda_devices_cnt;
   backend_ctx->cuda_devices_active  = cuda_devices_active;
-
-  return 0;
 }
 
-int backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device, bool *need_adl, MAYBE_UNUSED bool *need_sysfs_amdgpu)
+static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
 {
   #if defined (__linux__)
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
@@ -6262,10 +6260,10 @@ int backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_virtualize
 
       if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD_USE_HIP) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
       {
-         *need_adl = true;
+         backend_ctx->need_adl = true;
 
          #if defined (__linux__)
-         *need_sysfs_amdgpu = true;
+         backend_ctx->need_sysfs_amdgpu = true;
          #endif
       }
 
@@ -6400,11 +6398,9 @@ int backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_virtualize
 
   backend_ctx->hip_devices_cnt     = hip_devices_cnt;
   backend_ctx->hip_devices_active  = hip_devices_active;
-
-  return 0;
 }
 
-int backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED bool is_virtualized, MAYBE_UNUSED int virtmulti, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device, MAYBE_UNUSED bool *need_iokit)
+static void backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED bool is_virtualized, MAYBE_UNUSED int virtmulti, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device)
 {
   backend_ctx_t     *backend_ctx   = hashcat_ctx->backend_ctx;
 
@@ -6783,7 +6779,7 @@ int backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED boo
 
       if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_APPLE))
       {
-        *need_iokit = true;
+        backend_ctx->need_iokit = true;
       }
 
       // CPU burning loop damper
@@ -6835,11 +6831,9 @@ int backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED boo
 
   backend_ctx->metal_devices_cnt     = metal_devices_cnt;
   backend_ctx->metal_devices_active  = metal_devices_active;
-
-  return 0;
 }
 
-int backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device, bool *need_nvml, MAYBE_UNUSED bool *need_nvapi, bool *need_adl, MAYBE_UNUSED bool *need_sysfs_amdgpu, MAYBE_UNUSED bool *need_sysfs_cpu, MAYBE_UNUSED bool *need_iokit)
+static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
 {
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
   backend_ctx_t         *backend_ctx   = hashcat_ctx->backend_ctx;
@@ -7691,12 +7685,12 @@ int backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtual
           #if defined (__APPLE__)
           if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
           {
-            *need_iokit = true;
+            backend_ctx->need_iokit = true;
           }
           #endif
 
           #if defined (__linux__)
-          *need_sysfs_cpu = true;
+          backend_ctx->need_sysfs_cpu = true;
           #endif
         }
 
@@ -7704,19 +7698,19 @@ int backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtual
         {
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
           {
-            *need_adl = true;
+            backend_ctx->need_adl = true;
 
             #if defined (__linux__)
-            *need_sysfs_amdgpu = true;
+            backend_ctx->need_sysfs_amdgpu = true;
             #endif
           }
 
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) && (device_param->opencl_device_vendor_id == VENDOR_ID_NV))
           {
-            *need_nvml = true;
+            backend_ctx->need_nvml = true;
 
             #if defined (_WIN) || defined (__CYGWIN__)
-            *need_nvapi = true;
+            backend_ctx->need_nvapi = true;
             #endif
           }
 
@@ -7725,7 +7719,7 @@ int backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtual
           {
             if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
             {
-              *need_iokit = true;
+              backend_ctx->need_iokit = true;
             }
           }
           #endif
@@ -8244,8 +8238,6 @@ int backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtual
 
   backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
   backend_ctx->opencl_devices_active  = opencl_devices_active;
-
-  return 0;
 }
 
 int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
@@ -8258,12 +8250,12 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   hc_device_param_t *devices_param = backend_ctx->devices_param;
 
-  bool need_adl           = false;
-  bool need_nvml          = false;
-  bool need_nvapi         = false;
-  bool need_sysfs_amdgpu  = false;
-  bool need_sysfs_cpu     = false;
-  bool need_iokit         = false;
+  backend_ctx->need_adl           = false;
+  backend_ctx->need_nvml          = false;
+  backend_ctx->need_nvapi         = false;
+  backend_ctx->need_sysfs_amdgpu  = false;
+  backend_ctx->need_sysfs_cpu     = false;
+  backend_ctx->need_iokit         = false;
 
   int bridge_link_device = 0; // this will only count active device
 
@@ -8278,19 +8270,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   // CUDA
 
-  backend_ctx_devices_init_cuda (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_nvml, &need_nvapi);
+  backend_ctx_devices_init_cuda (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // HIP
 
-  backend_ctx_devices_init_hip (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_adl, &need_sysfs_amdgpu);
+  backend_ctx_devices_init_hip (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // Metal
 
-  backend_ctx_devices_init_metal (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_iokit);
+  backend_ctx_devices_init_metal (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // OCL
 
-  backend_ctx_devices_init_opencl (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device, &need_nvml, &need_nvapi, &need_adl, &need_sysfs_amdgpu, &need_sysfs_cpu, &need_iokit);
+  backend_ctx_devices_init_opencl (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // all devices combined go into backend_* variables
 
@@ -8908,13 +8900,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   backend_ctx->target_msec  = TARGET_MSEC_PROFILE[user_options->workload_profile - 1];
 
-  backend_ctx->need_adl           = need_adl;
-  backend_ctx->need_nvml          = need_nvml;
-  backend_ctx->need_nvapi         = need_nvapi;
-  backend_ctx->need_sysfs_amdgpu  = need_sysfs_amdgpu;
-  backend_ctx->need_sysfs_cpu     = need_sysfs_cpu;
-  backend_ctx->need_iokit         = need_iokit;
-
   backend_ctx->comptime = comptime;
 
   return 0;

From ca7111996863cbc0552c3a9e016cecfa20480f8f Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 16:59:39 +0200
Subject: [PATCH 46/83] Selftest: rename selftest function to process_selftest
 and splitting into 3 smaller functions

---
 docs/changes.txt |  1 +
 src/selftest.c   | 72 ++++++++++++++++++++++++++++++++++--------------
 2 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index 7e15685c3..f4dc61531 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -141,6 +141,7 @@
 - Modules: Updated module_unstable_warning
 - Open Document Format: Added support for small documents with content length < 1024
 - OpenCL Backend: added workaround to set device_available_memory from CUDA/HIP alias device
+- Selftest: rename selftest function to process_selftest and splitting into 3 smaller functions
 - Status Code: Add specific return code for self-test fail (-11)
 - Scrypt: Increase buffer sizes in module for hash mode 8900 to allow longer scrypt digests
 - Unicode: Update UTF-8 to UTF-16 conversion to match RFC 3629
diff --git a/src/selftest.c b/src/selftest.c
index f1b2b2dd5..aea6f3128 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -12,18 +12,14 @@
 #include "thread.h"
 #include "selftest.h"
 
-static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
+static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 *highest_pw_len)
 {
-  bridge_ctx_t         *bridge_ctx         = hashcat_ctx->bridge_ctx;
-  hashconfig_t         *hashconfig         = hashcat_ctx->hashconfig;
   hashes_t             *hashes             = hashcat_ctx->hashes;
   module_ctx_t         *module_ctx         = hashcat_ctx->module_ctx;
-  status_ctx_t         *status_ctx         = hashcat_ctx->status_ctx;
+  hashconfig_t         *hashconfig         = hashcat_ctx->hashconfig;
   user_options_t       *user_options       = hashcat_ctx->user_options;
   user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
 
-  if (hashconfig->st_hash == NULL) return 0;
-
   // init : replace hashes with selftest hash
 
   if (device_param->is_cuda == true)
@@ -86,8 +82,6 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   pw_t comb;
   bf_t bf;
 
-  u32 highest_pw_len = 0;
-
   if (user_options->slow_candidates == true)
   {
     if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
@@ -460,7 +454,7 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
             if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
           }
 
-          highest_pw_len = pw.pw_len;
+          *highest_pw_len = pw.pw_len;
         }
       }
     }
@@ -500,6 +494,16 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     }
   }
 
+  return 0;
+}
+
+static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 highest_pw_len)
+{
+  bridge_ctx_t *bridge_ctx = hashcat_ctx->bridge_ctx;
+  hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
+  hashes_t     *hashes     = hashcat_ctx->hashes;
+  module_ctx_t *module_ctx = hashcat_ctx->module_ctx;
+
   // main : run the kernel
 
   const u32 kernel_threads_sav = device_param->kernel_threads;
@@ -933,22 +937,28 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   device_param->kernel_threads = kernel_threads_sav;
 
-  // check : check if cracked
+  return 0;
+}
 
-  u32 num_cracked = 0;
+static int selftest_cleanup (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u32 *num_cracked)
+{
+  user_options_t       *user_options       = hashcat_ctx->user_options;
+  user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
+
+  // check : check if cracked
 
   cl_event opencl_event;
 
   if (device_param->is_cuda == true)
   {
-    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32), device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, num_cracked, device_param->cuda_d_result, sizeof (u32), device_param->cuda_stream) == -1) return -1;
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event3, device_param->cuda_stream) == -1) return -1;
   }
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32), device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, num_cracked, device_param->hip_d_result, sizeof (u32), device_param->hip_stream) == -1) return -1;
 
     if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event3, device_param->hip_stream) == -1) return -1;
   }
@@ -956,13 +966,13 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   #if defined (__APPLE__)
   if (device_param->is_metal == true)
   {
-    if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, &num_cracked, device_param->metal_d_result, 0, sizeof (u32)) == -1) return -1;
+    if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, num_cracked, device_param->metal_d_result, 0, sizeof (u32)) == -1) return -1;
   }
   #endif
 
   if (device_param->is_opencl == true)
   {
-    if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_FALSE, 0, sizeof (u32), &num_cracked, 0, NULL, &opencl_event) == -1) return -1;
+    if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_FALSE, 0, sizeof (u32), num_cracked, 0, NULL, &opencl_event) == -1) return -1;
 
     if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
   }
@@ -1154,7 +1164,27 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     if (hc_clReleaseEvent (hashcat_ctx, opencl_event) == -1) return -1;
   }
 
+  return 0;
+}
+
+static int process_selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
+{
+  hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
+  status_ctx_t *status_ctx = hashcat_ctx->status_ctx;
+
+  if (hashconfig->st_hash == NULL) return 0;
+
+  u32 highest_pw_len = 0;
+  u32 num_cracked = 0;
+
+  if (selftest_init (hashcat_ctx, device_param, &highest_pw_len) == -1) return -1;
+
+  if (selftest_run_kernel (hashcat_ctx, device_param, highest_pw_len) == -1) return -1;
+
+  if (selftest_cleanup (hashcat_ctx, device_param, &num_cracked) == -1) return -1;
+
   // check return
+
   if (num_cracked == 0)
   {
     hc_thread_mutex_lock (status_ctx->mux_display);
@@ -1169,20 +1199,22 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! HIP kernel self-test failed.", device_param->device_id + 1);
     }
 
-    #if defined (__APPLE__)
     if (device_param->is_metal == true)
     {
       event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! Metal kernel self-test failed.", device_param->device_id + 1);
     }
-    #endif
 
     if (device_param->is_opencl == true)
     {
       event_log_error (hashcat_ctx, "* Device #%u: ATTENTION! OpenCL kernel self-test failed.", device_param->device_id + 1);
     }
 
-    event_log_warning (hashcat_ctx, "Your device driver installation is probably broken.");
-    event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+    if (device_param->is_metal == false)
+    {
+      event_log_warning (hashcat_ctx, "Your device driver installation is probably broken.");
+      event_log_warning (hashcat_ctx, "See also: https://hashcat.net/faq/wrongdriver");
+    }
+
     event_log_warning (hashcat_ctx, NULL);
 
     hc_thread_mutex_unlock (status_ctx->mux_display);
@@ -1232,7 +1264,7 @@ HC_API_CALL void *thread_selftest (void *p)
     if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
   }
 
-  const int rc_selftest = selftest (hashcat_ctx, device_param);
+  const int rc_selftest = process_selftest (hashcat_ctx, device_param);
 
   if (user_options->benchmark == true)
   {

From 907e58c27dd5c9c4474d97afd7430d24fb860f18 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 18:26:17 +0200
Subject: [PATCH 47/83] move is_virtualized and virtmulti into runtime-specific
 functions to simplify parameters

---
 src/backend.c | 66 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 25 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 9ffd6e2cc..f65b2959e 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5418,12 +5418,17 @@ void backend_ctx_destroy (hashcat_ctx_t *hashcat_ctx)
   memset (backend_ctx, 0, sizeof (backend_ctx_t));
 }
 
-static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
+static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
 {
-  backend_ctx_t     *backend_ctx   = hashcat_ctx->backend_ctx;
-  user_options_t    *user_options  = hashcat_ctx->user_options;
+  const bridge_ctx_t   *bridge_ctx    = hashcat_ctx->bridge_ctx;
+        backend_ctx_t  *backend_ctx   = hashcat_ctx->backend_ctx;
+        user_options_t *user_options  = hashcat_ctx->user_options;
 
-  hc_device_param_t *devices_param = backend_ctx->devices_param;
+  hc_device_param_t    *devices_param = backend_ctx->devices_param;
+
+  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
+
+  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
 
   int cuda_devices_cnt    = 0;
   int cuda_devices_active = 0;
@@ -5893,16 +5898,21 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, bool is_v
   backend_ctx->cuda_devices_active  = cuda_devices_active;
 }
 
-static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
+static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
 {
   #if defined (__linux__)
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
   #endif
-  backend_ctx_t         *backend_ctx   = hashcat_ctx->backend_ctx;
-  user_options_t        *user_options  = hashcat_ctx->user_options;
+  const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
+        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
+        user_options_t  *user_options  = hashcat_ctx->user_options;
 
   hc_device_param_t     *devices_param = backend_ctx->devices_param;
 
+  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
+
+  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
+
   int hip_devices_cnt    = 0;
   int hip_devices_active = 0;
 
@@ -6400,15 +6410,21 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, bool is_vi
   backend_ctx->hip_devices_active  = hip_devices_active;
 }
 
-static void backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED bool is_virtualized, MAYBE_UNUSED int virtmulti, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device)
+static void backend_ctx_devices_init_metal (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device)
 {
-  backend_ctx_t     *backend_ctx   = hashcat_ctx->backend_ctx;
-
   int metal_devices_cnt    = 0;
   int metal_devices_active = 0;
 
   #if defined (__APPLE__)
-  hc_device_param_t *devices_param = backend_ctx->devices_param;
+  const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
+        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
+        user_options_t  *user_options  = hashcat_ctx->user_options;
+
+  hc_device_param_t     *devices_param = backend_ctx->devices_param;
+
+  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
+
+  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
 
   if (backend_ctx->mtl)
   {
@@ -6833,14 +6849,19 @@ static void backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UN
   backend_ctx->metal_devices_active  = metal_devices_active;
 }
 
-static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is_virtualized, int virtmulti, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
+static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, int *virthost, int *virthost_finder, int *backend_devices_idx, int *bridge_link_device)
 {
   const folder_config_t *folder_config = hashcat_ctx->folder_config;
-  backend_ctx_t         *backend_ctx   = hashcat_ctx->backend_ctx;
-  user_options_t        *user_options  = hashcat_ctx->user_options;
+  const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
+        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
+        user_options_t  *user_options  = hashcat_ctx->user_options;
 
   hc_device_param_t     *devices_param = backend_ctx->devices_param;
 
+  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
+
+  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
+
   int opencl_devices_cnt    = 0;
   int opencl_devices_active = 0;
 
@@ -8242,12 +8263,11 @@ static void backend_ctx_devices_init_opencl (hashcat_ctx_t *hashcat_ctx, bool is
 
 int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 {
-  const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
-        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
-        user_options_t  *user_options  = hashcat_ctx->user_options;
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
 
   if (backend_ctx->enabled == false) return 0;
 
+  user_options_t    *user_options  = hashcat_ctx->user_options;
   hc_device_param_t *devices_param = backend_ctx->devices_param;
 
   backend_ctx->need_adl           = false;
@@ -8261,28 +8281,24 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
   int backend_devices_idx = 0; // this will not only count active devices
 
-  bool is_virtualized = ((user_options->backend_devices_virtmulti > 1) || (bridge_ctx->enabled == true)) ? true : false;
-
-  int virtmulti = (bridge_ctx->enabled == true) ? bridge_ctx->get_unit_count (bridge_ctx->platform_context) : (int) user_options->backend_devices_virtmulti;
-
   int virthost = -1;
   int virthost_finder = user_options->backend_devices_virthost;
 
   // CUDA
 
-  backend_ctx_devices_init_cuda (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
+  backend_ctx_devices_init_cuda (hashcat_ctx, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // HIP
 
-  backend_ctx_devices_init_hip (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
+  backend_ctx_devices_init_hip (hashcat_ctx, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // Metal
 
-  backend_ctx_devices_init_metal (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
+  backend_ctx_devices_init_metal (hashcat_ctx, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // OCL
 
-  backend_ctx_devices_init_opencl (hashcat_ctx, is_virtualized, virtmulti, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
+  backend_ctx_devices_init_opencl (hashcat_ctx, &virthost, &virthost_finder, &backend_devices_idx, &bridge_link_device);
 
   // all devices combined go into backend_* variables
 

From 0830bc4b9ffe123696f80490c84bfef646676e2a Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 18:29:39 +0200
Subject: [PATCH 48/83] set backend_ctx before macro preprocessor, into
 backend_ctx_devices_init_metal

---
 src/backend.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index f65b2959e..1820359e1 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6410,14 +6410,15 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
   backend_ctx->hip_devices_active  = hip_devices_active;
 }
 
-static void backend_ctx_devices_init_metal (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device)
+static void backend_ctx_devices_init_metal (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED int *virthost, MAYBE_UNUSED int *virthost_finder, MAYBE_UNUSED int *backend_devices_idx, MAYBE_UNUSED int *bridge_link_device)
 {
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+
   int metal_devices_cnt    = 0;
   int metal_devices_active = 0;
 
   #if defined (__APPLE__)
   const bridge_ctx_t    *bridge_ctx    = hashcat_ctx->bridge_ctx;
-        backend_ctx_t   *backend_ctx   = hashcat_ctx->backend_ctx;
         user_options_t  *user_options  = hashcat_ctx->user_options;
 
   hc_device_param_t     *devices_param = backend_ctx->devices_param;

From 161f00b9debd74977a0fa09eff8e035118eea3f2 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 19:07:33 +0200
Subject: [PATCH 49/83] Documents: Renamed status_code.txt in
 exit_status_code.txt and added device_status_code.txt

---
 docs/changes.txt                              |  1 +
 docs/device_status_code.txt                   | 20 +++++++++++++++++++
 ...{status_codes.txt => exit_status_code.txt} |  0
 3 files changed, 21 insertions(+)
 create mode 100644 docs/device_status_code.txt
 rename docs/{status_codes.txt => exit_status_code.txt} (100%)

diff --git a/docs/changes.txt b/docs/changes.txt
index f4dc61531..b36c1e3ff 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -136,6 +136,7 @@
 - Dependencies: Updated xxHash to 0.8.3 (commit 50f4226)
 - Building: Support building windows binaries on macOS using MinGW
 - Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85)
+- Documents: Renamed status_code.txt in exit_status_code.txt and added device_status_code.txt
 - Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
 - Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
 - Modules: Updated module_unstable_warning
diff --git a/docs/device_status_code.txt b/docs/device_status_code.txt
new file mode 100644
index 000000000..bd1607409
--- /dev/null
+++ b/docs/device_status_code.txt
@@ -0,0 +1,20 @@
+Device Status Codes:
+====================
+
+ 0 = "Initializing"
+ 1 = "Autotuning"
+ 2 = "Selftest"
+ 3 = "Running"
+ 4 = "Paused"
+ 5 = "Exhausted"
+ 6 = "Cracked"
+ 7 = "Aborted"
+ 8 = "Quit"
+ 9 = "Bypass"
+10 = "Aborted (Checkpoint)"
+11 = "Aborted (Runtime)"
+12 = "Running (Checkpoint Quit requested)"
+13 = "Error"
+14 = "Aborted (Finish)"
+15 = "Running (Quit after attack requested)"
+16 = "Autodetect"
diff --git a/docs/status_codes.txt b/docs/exit_status_code.txt
similarity index 100%
rename from docs/status_codes.txt
rename to docs/exit_status_code.txt

From 1978231c88a095dd1cf22d37fcdc74ee2aa43249 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 22:51:08 +0200
Subject: [PATCH 50/83] - Fixed bug in Hardware Monitor: prevent disable if ADL
 fail - Hardware Monitor: Splitting hwmon_ctx_init function into smaller
 library-specific functions

---
 docs/changes.txt |   2 +
 src/hwmon.c      | 481 +++++++++++++++++++++++++----------------------
 2 files changed, 259 insertions(+), 224 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index f4dc61531..4039eae51 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -92,6 +92,7 @@
 - Fixed bug in grep out-of-memory workaround on Unit Test
 - Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules
 - Fixed bug in --stdout that caused certain rules to malfunction
+- Fixed bug in Hardware Monitor: prevent disable if ADL fail
 - Fixed build failed for 10700 optimized with Apple Metal
 - Fixed build failed for 13772 and 13773 with Apple Metal
 - Fixed build failed for 18400 with Apple Metal
@@ -155,6 +156,7 @@
 - Modules: Check UnpackSize to raise false positive with hc_decompress_rar
 - User Options: added --metal-compiler-runtime option
 - Hardware Monitor: avoid sprintf in src/ext_iokit.c
+- Hardware Monitor: Splitting hwmon_ctx_init function into smaller library-specific functions
 - Help: show supported hash-modes only with -hh
 - Makefile: prevent make failure with Apple Silicon in case of partial rebuild
 - Rules: Rename best64.rule to best66.rule and remove the unknown section from it
diff --git a/src/hwmon.c b/src/hwmon.c
index 4f5264b3d..e32b261fd 100644
--- a/src/hwmon.c
+++ b/src/hwmon.c
@@ -1268,142 +1268,10 @@ u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int ba
   return 0;
 }
 
-int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
+static void hwmon_ctx_init_nvml (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_nvml, int backend_devices_cnt)
 {
-  bridge_ctx_t   *bridge_ctx   = hashcat_ctx->bridge_ctx;
-  hwmon_ctx_t    *hwmon_ctx    = hashcat_ctx->hwmon_ctx;
-  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
-  user_options_t *user_options = hashcat_ctx->user_options;
-
-  hwmon_ctx->enabled = false;
-
-  int backend_devices_cnt = backend_ctx->backend_devices_cnt;
-
-  if (bridge_ctx->enabled == true) backend_devices_cnt = 1;
-
-  //#if !defined (WITH_HWMON)
-  //return 0;
-  //#endif // WITH_HWMON
-
-  if (user_options->usage          > 0)     return 0;
-  //if (user_options->backend_info   > 0)     return 0;
-
-  if (user_options->hash_info     == true)  return 0;
-  if (user_options->keyspace      == true)  return 0;
-  if (user_options->left          == true)  return 0;
-  if (user_options->show          == true)  return 0;
-  if (user_options->stdout_flag   == true)  return 0;
-  if (user_options->version       == true)  return 0;
-  if (user_options->identify      == true)  return 0;
-  //we need hwmon support to get free memory per device support
-  //its a joke, but there's no way around
-  //if (user_options->hwmon         == false) return 0;
-
-  hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-
-  /**
-   * Initialize shared libraries
-   */
-
-  hm_attrs_t *hm_adapters_adl           = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-  hm_attrs_t *hm_adapters_nvapi         = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-  hm_attrs_t *hm_adapters_nvml          = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-  hm_attrs_t *hm_adapters_sysfs_amdgpu  = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-  hm_attrs_t *hm_adapters_sysfs_cpu     = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-  hm_attrs_t *hm_adapters_iokit         = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
-
-  #define FREE_ADAPTERS                \
-  do {                                 \
-    hcfree (hm_adapters_adl);          \
-    hcfree (hm_adapters_nvapi);        \
-    hcfree (hm_adapters_nvml);         \
-    hcfree (hm_adapters_sysfs_amdgpu); \
-    hcfree (hm_adapters_sysfs_cpu);    \
-    hcfree (hm_adapters_iokit);        \
-  } while (0)
-
-  if (backend_ctx->need_nvml == true)
-  {
-    hwmon_ctx->hm_nvml = (NVML_PTR *) hcmalloc (sizeof (NVML_PTR));
-
-    if (nvml_init (hashcat_ctx) == -1)
-    {
-      hcfree (hwmon_ctx->hm_nvml);
-
-      hwmon_ctx->hm_nvml = NULL;
-    }
-  }
-
-  if ((backend_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
-  {
-    hwmon_ctx->hm_nvapi = (NVAPI_PTR *) hcmalloc (sizeof (NVAPI_PTR));
-
-    if (nvapi_init (hashcat_ctx) == -1)
-    {
-      hcfree (hwmon_ctx->hm_nvapi);
-
-      hwmon_ctx->hm_nvapi = NULL;
-    }
-  }
-
-  if (backend_ctx->need_adl == true)
-  {
-    hwmon_ctx->hm_adl = (ADL_PTR *) hcmalloc (sizeof (ADL_PTR));
-
-    if (adl_init (hashcat_ctx) == -1)
-    {
-      hcfree (hwmon_ctx->hm_adl);
-
-      hwmon_ctx->hm_adl = NULL;
-    }
-  }
-
-  if (backend_ctx->need_sysfs_amdgpu == true)
-  {
-    hwmon_ctx->hm_sysfs_amdgpu = (SYSFS_AMDGPU_PTR *) hcmalloc (sizeof (SYSFS_AMDGPU_PTR));
-
-    if (sysfs_amdgpu_init (hashcat_ctx) == false)
-    {
-      hcfree (hwmon_ctx->hm_sysfs_amdgpu);
-
-      hwmon_ctx->hm_sysfs_amdgpu = NULL;
-    }
-
-    // also if there's ADL, we don't need sysfs_amdgpu
-
-    if (hwmon_ctx->hm_adl)
-    {
-      hcfree (hwmon_ctx->hm_sysfs_amdgpu);
-
-      hwmon_ctx->hm_sysfs_amdgpu = NULL;
-    }
-  }
-
-  if (backend_ctx->need_sysfs_cpu == true)
-  {
-    hwmon_ctx->hm_sysfs_cpu = (SYSFS_CPU_PTR *) hcmalloc (sizeof (SYSFS_CPU_PTR));
-
-    if (sysfs_cpu_init (hashcat_ctx) == false)
-    {
-      hcfree (hwmon_ctx->hm_sysfs_cpu);
-
-      hwmon_ctx->hm_sysfs_cpu = NULL;
-    }
-  }
-
-  #if defined(__APPLE__)
-  if (backend_ctx->need_iokit == true)
-  {
-    hwmon_ctx->hm_iokit = (IOKIT_PTR *) hcmalloc (sizeof (IOKIT_PTR));
-
-    if (iokit_init (hashcat_ctx) == false)
-    {
-      hcfree (hwmon_ctx->hm_iokit);
-
-      hwmon_ctx->hm_iokit = NULL;
-    }
-  }
-  #endif
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
 
   if (hwmon_ctx->hm_nvml)
   {
@@ -1485,6 +1353,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
       hcfree (nvmlGPUHandle);
     }
   }
+}
+
+static void hwmon_ctx_init_nvapi (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_nvapi, int backend_devices_cnt)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
 
   if (hwmon_ctx->hm_nvapi)
   {
@@ -1558,6 +1432,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
       hcfree (nvGPUHandle);
     }
   }
+}
+
+static int hwmon_ctx_init_adl (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_adl, int backend_devices_cnt)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
 
   if (hwmon_ctx->hm_adl)
   {
@@ -1567,23 +1447,13 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
       int tmp_in;
 
-      if (get_adapters_num_adl (hashcat_ctx, &tmp_in) == -1)
-      {
-        FREE_ADAPTERS;
-
-        return -1;
-      }
+      if (get_adapters_num_adl (hashcat_ctx, &tmp_in) == -1) return -1;
 
       // adapter info
 
       LPAdapterInfo lpAdapterInfo = (LPAdapterInfo) hccalloc (tmp_in, sizeof (AdapterInfo));
 
-      if (hm_ADL_Adapter_AdapterInfo_Get (hashcat_ctx, lpAdapterInfo, tmp_in * sizeof (AdapterInfo)) == -1)
-      {
-        FREE_ADAPTERS;
-
-        return -1;
-      }
+      if (hm_ADL_Adapter_AdapterInfo_Get (hashcat_ctx, lpAdapterInfo, tmp_in * sizeof (AdapterInfo)) == -1) return -1;
 
       for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
       {
@@ -1639,107 +1509,260 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     }
   }
 
+  return 0;
+}
+
+static void hwmon_ctx_init_sysfs_amdgpu_iokit (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_sysfs_amdgpu, hm_attrs_t *hm_adapters_iokit, int backend_devices_cnt)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
+
   if (hwmon_ctx->hm_sysfs_amdgpu || hwmon_ctx->hm_iokit)
   {
-    if (true)
+    for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
     {
-      for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+
+      if (device_param->skipped == true) continue;
+
+      if (device_param->is_cuda == true)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+        // nothing to do
+      }
 
-        if (device_param->skipped == true) continue;
+      #if defined (__APPLE__)
+      if (device_param->is_metal == true)
+      {
+        const u32 device_id = device_param->device_id;
 
-        if (device_param->is_cuda == true)
+        if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
         {
-          // nothing to do
+          hm_adapters_iokit[device_id].buslanes_get_supported    = false;
+          hm_adapters_iokit[device_id].corespeed_get_supported   = false;
+          hm_adapters_iokit[device_id].fanspeed_get_supported    = true;
+          hm_adapters_iokit[device_id].fanpolicy_get_supported   = false;
+          hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
+          hm_adapters_iokit[device_id].temperature_get_supported = true;
+          hm_adapters_iokit[device_id].utilization_get_supported = true;
+        }
+      }
+      #endif
+
+      if ((device_param->is_opencl == true) || (device_param->is_hip == true))
+      {
+        const u32 device_id = device_param->device_id;
+
+        if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
+        {
+          hm_adapters_iokit[device_id].buslanes_get_supported    = false;
+          hm_adapters_iokit[device_id].corespeed_get_supported   = false;
+          hm_adapters_iokit[device_id].fanspeed_get_supported    = true;
+          hm_adapters_iokit[device_id].fanpolicy_get_supported   = false;
+          hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
+          hm_adapters_iokit[device_id].temperature_get_supported = true;
+          hm_adapters_iokit[device_id].utilization_get_supported = true;
         }
 
-        #if defined (__APPLE__)
-        if (device_param->is_metal == true)
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
+
+        if (hwmon_ctx->hm_sysfs_amdgpu)
         {
-          const u32 device_id = device_param->device_id;
-
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
-          {
-            hm_adapters_iokit[device_id].buslanes_get_supported    = false;
-            hm_adapters_iokit[device_id].corespeed_get_supported   = false;
-            hm_adapters_iokit[device_id].fanspeed_get_supported    = true;
-            hm_adapters_iokit[device_id].fanpolicy_get_supported   = false;
-            hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
-            hm_adapters_iokit[device_id].temperature_get_supported = true;
-            hm_adapters_iokit[device_id].utilization_get_supported = true;
-          }
-        }
-        #endif
-
-        if ((device_param->is_opencl == true) || (device_param->is_hip == true))
-        {
-          const u32 device_id = device_param->device_id;
-
-          if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
-          {
-            hm_adapters_iokit[device_id].buslanes_get_supported    = false;
-            hm_adapters_iokit[device_id].corespeed_get_supported   = false;
-            hm_adapters_iokit[device_id].fanspeed_get_supported    = true;
-            hm_adapters_iokit[device_id].fanpolicy_get_supported   = false;
-            hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
-            hm_adapters_iokit[device_id].temperature_get_supported = true;
-            hm_adapters_iokit[device_id].utilization_get_supported = true;
-          }
-
-          if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
-
-          if (hwmon_ctx->hm_sysfs_amdgpu)
-          {
-            hm_adapters_sysfs_amdgpu[device_id].buslanes_get_supported    = true;
-            hm_adapters_sysfs_amdgpu[device_id].corespeed_get_supported   = true;
-            hm_adapters_sysfs_amdgpu[device_id].fanspeed_get_supported    = true;
-            hm_adapters_sysfs_amdgpu[device_id].fanpolicy_get_supported   = true;
-            hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true;
-            hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true;
-            hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true;
-            hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported  = true;
-          }
+          hm_adapters_sysfs_amdgpu[device_id].buslanes_get_supported    = true;
+          hm_adapters_sysfs_amdgpu[device_id].corespeed_get_supported   = true;
+          hm_adapters_sysfs_amdgpu[device_id].fanspeed_get_supported    = true;
+          hm_adapters_sysfs_amdgpu[device_id].fanpolicy_get_supported   = true;
+          hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true;
+          hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true;
+          hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true;
+          hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported  = true;
         }
       }
     }
   }
+}
+
+static void hwmon_ctx_init_sysfs_cpu (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_sysfs_cpu, int backend_devices_cnt)
+{
+  backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
+  hwmon_ctx_t   *hwmon_ctx   = hashcat_ctx->hwmon_ctx;
 
   if (hwmon_ctx->hm_sysfs_cpu)
   {
-    if (true)
+    for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
     {
-      for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
+
+      if (device_param->skipped == true) continue;
+
+      if (device_param->is_cuda == true)
       {
-        hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
-
-        if (device_param->skipped == true) continue;
-
-        if (device_param->is_cuda == true)
-        {
           // nothing to do
-        }
+      }
 
-        if ((device_param->is_opencl == true) || (device_param->is_hip == true))
+      if ((device_param->is_opencl == true) || (device_param->is_hip == true))
+      {
+        const u32 device_id = device_param->device_id;
+
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) == 0) continue;
+
+        if (hwmon_ctx->hm_sysfs_cpu)
         {
-          const u32 device_id = device_param->device_id;
-
-          if ((device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) == 0) continue;
-
-          if (hwmon_ctx->hm_sysfs_cpu)
-          {
-            hm_adapters_sysfs_cpu[device_id].buslanes_get_supported    = false;
-            hm_adapters_sysfs_cpu[device_id].corespeed_get_supported   = false;
-            hm_adapters_sysfs_cpu[device_id].fanspeed_get_supported    = false;
-            hm_adapters_sysfs_cpu[device_id].fanpolicy_get_supported   = false;
-            hm_adapters_sysfs_cpu[device_id].memoryspeed_get_supported = false;
-            hm_adapters_sysfs_cpu[device_id].temperature_get_supported = true;
-            hm_adapters_sysfs_cpu[device_id].utilization_get_supported = true;
-          }
+          hm_adapters_sysfs_cpu[device_id].buslanes_get_supported    = false;
+          hm_adapters_sysfs_cpu[device_id].corespeed_get_supported   = false;
+          hm_adapters_sysfs_cpu[device_id].fanspeed_get_supported    = false;
+          hm_adapters_sysfs_cpu[device_id].fanpolicy_get_supported   = false;
+          hm_adapters_sysfs_cpu[device_id].memoryspeed_get_supported = false;
+          hm_adapters_sysfs_cpu[device_id].temperature_get_supported = true;
+          hm_adapters_sysfs_cpu[device_id].utilization_get_supported = true;
         }
       }
     }
   }
+}
+
+int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
+{
+  bridge_ctx_t   *bridge_ctx   = hashcat_ctx->bridge_ctx;
+  hwmon_ctx_t    *hwmon_ctx    = hashcat_ctx->hwmon_ctx;
+  backend_ctx_t  *backend_ctx  = hashcat_ctx->backend_ctx;
+  user_options_t *user_options = hashcat_ctx->user_options;
+
+  hwmon_ctx->enabled = false;
+
+  int backend_devices_cnt = backend_ctx->backend_devices_cnt;
+
+  if (bridge_ctx->enabled == true) backend_devices_cnt = 1;
+
+  //#if !defined (WITH_HWMON)
+  //return 0;
+  //#endif // WITH_HWMON
+
+  if (user_options->usage          > 0)     return 0;
+  //if (user_options->backend_info   > 0)     return 0;
+
+  if (user_options->hash_info     == true)  return 0;
+  if (user_options->keyspace      == true)  return 0;
+  if (user_options->left          == true)  return 0;
+  if (user_options->show          == true)  return 0;
+  if (user_options->stdout_flag   == true)  return 0;
+  if (user_options->version       == true)  return 0;
+  if (user_options->identify      == true)  return 0;
+  //we need hwmon support to get free memory per device support
+  //its a joke, but there's no way around
+  //if (user_options->hwmon         == false) return 0;
+
+  hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+
+  /**
+   * Initialize shared libraries
+   */
+
+  hm_attrs_t *hm_adapters_adl           = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+  hm_attrs_t *hm_adapters_nvapi         = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+  hm_attrs_t *hm_adapters_nvml          = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+  hm_attrs_t *hm_adapters_sysfs_amdgpu  = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+  hm_attrs_t *hm_adapters_sysfs_cpu     = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+  hm_attrs_t *hm_adapters_iokit         = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
+
+  if (backend_ctx->need_nvml == true)
+  {
+    hwmon_ctx->hm_nvml = (NVML_PTR *) hcmalloc (sizeof (NVML_PTR));
+
+    if (nvml_init (hashcat_ctx) == -1)
+    {
+      hcfree (hwmon_ctx->hm_nvml);
+
+      hwmon_ctx->hm_nvml = NULL;
+    }
+  }
+
+  if ((backend_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
+  {
+    hwmon_ctx->hm_nvapi = (NVAPI_PTR *) hcmalloc (sizeof (NVAPI_PTR));
+
+    if (nvapi_init (hashcat_ctx) == -1)
+    {
+      hcfree (hwmon_ctx->hm_nvapi);
+
+      hwmon_ctx->hm_nvapi = NULL;
+    }
+  }
+
+  if (backend_ctx->need_adl == true)
+  {
+    hwmon_ctx->hm_adl = (ADL_PTR *) hcmalloc (sizeof (ADL_PTR));
+
+    if (adl_init (hashcat_ctx) == -1)
+    {
+      hcfree (hwmon_ctx->hm_adl);
+
+      hwmon_ctx->hm_adl = NULL;
+    }
+  }
+
+  if (backend_ctx->need_sysfs_amdgpu == true)
+  {
+    hwmon_ctx->hm_sysfs_amdgpu = (SYSFS_AMDGPU_PTR *) hcmalloc (sizeof (SYSFS_AMDGPU_PTR));
+
+    if (sysfs_amdgpu_init (hashcat_ctx) == false)
+    {
+      hcfree (hwmon_ctx->hm_sysfs_amdgpu);
+
+      hwmon_ctx->hm_sysfs_amdgpu = NULL;
+    }
+  }
+
+  if (backend_ctx->need_sysfs_cpu == true)
+  {
+    hwmon_ctx->hm_sysfs_cpu = (SYSFS_CPU_PTR *) hcmalloc (sizeof (SYSFS_CPU_PTR));
+
+    if (sysfs_cpu_init (hashcat_ctx) == false)
+    {
+      hcfree (hwmon_ctx->hm_sysfs_cpu);
+
+      hwmon_ctx->hm_sysfs_cpu = NULL;
+    }
+  }
+
+  #if defined(__APPLE__)
+  if (backend_ctx->need_iokit == true)
+  {
+    hwmon_ctx->hm_iokit = (IOKIT_PTR *) hcmalloc (sizeof (IOKIT_PTR));
+
+    if (iokit_init (hashcat_ctx) == false)
+    {
+      hcfree (hwmon_ctx->hm_iokit);
+
+      hwmon_ctx->hm_iokit = NULL;
+    }
+  }
+  #endif
+
+  hwmon_ctx_init_nvml  (hashcat_ctx, hm_adapters_nvml,  backend_devices_cnt);
+
+  hwmon_ctx_init_nvapi (hashcat_ctx, hm_adapters_nvapi, backend_devices_cnt);
+
+  // if ADL init fail, disable
+
+  if (hwmon_ctx_init_adl (hashcat_ctx, hm_adapters_adl, backend_devices_cnt) == -1)
+  {
+    hcfree (hwmon_ctx->hm_adl);
+
+    hwmon_ctx->hm_adl = NULL;
+  }
+
+  // if there's ADL, we don't need sysfs_amdgpu
+
+  if (hwmon_ctx->hm_adl)
+  {
+    hcfree (hwmon_ctx->hm_sysfs_amdgpu);
+
+    hwmon_ctx->hm_sysfs_amdgpu = NULL;
+  }
+
+  hwmon_ctx_init_sysfs_amdgpu_iokit (hashcat_ctx, hm_adapters_sysfs_amdgpu, hm_adapters_iokit, backend_devices_cnt);
+
+  hwmon_ctx_init_sysfs_cpu (hashcat_ctx, hm_adapters_sysfs_cpu, backend_devices_cnt);
 
   #if defined(__APPLE__)
   if (backend_ctx->need_iokit == true)
@@ -1757,7 +1780,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (hwmon_ctx->hm_adl == NULL && hwmon_ctx->hm_nvml == NULL && hwmon_ctx->hm_sysfs_amdgpu == NULL && hwmon_ctx->hm_sysfs_cpu == NULL && hwmon_ctx->hm_iokit == NULL)
   {
-    FREE_ADAPTERS;
+    hcfree (hm_adapters_adl);
+    hcfree (hm_adapters_nvapi);
+    hcfree (hm_adapters_nvml);
+    hcfree (hm_adapters_sysfs_amdgpu);
+    hcfree (hm_adapters_sysfs_cpu);
+    hcfree (hm_adapters_iokit);
 
     return 0;
   }
@@ -1992,7 +2020,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
     hm_get_memoryused_with_devices_idx         (hashcat_ctx, backend_devices_idx);
   }
 
-  FREE_ADAPTERS;
+  hcfree (hm_adapters_adl);
+  hcfree (hm_adapters_nvapi);
+  hcfree (hm_adapters_nvml);
+  hcfree (hm_adapters_sysfs_amdgpu);
+  hcfree (hm_adapters_sysfs_cpu);
+  hcfree (hm_adapters_iokit);
 
   return 0;
 }

From f848163b0ac594f43695a716167d75a798d7eddc Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 29 Jun 2025 23:08:43 +0200
Subject: [PATCH 51/83] Fixed race condition in selftest_init on OpenCL with
 non-blocking write

---
 docs/changes.txt |  1 +
 src/selftest.c   | 27 +++++++++++++++++++--------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index f4dc61531..2d8b5007a 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -92,6 +92,7 @@
 - Fixed bug in grep out-of-memory workaround on Unit Test
 - Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules
 - Fixed bug in --stdout that caused certain rules to malfunction
+- Fixed race condition in selftest_init on OpenCL with non-blocking write
 - Fixed build failed for 10700 optimized with Apple Metal
 - Fixed build failed for 13772 and 13773 with Apple Metal
 - Fixed build failed for 18400 with Apple Metal
diff --git a/src/selftest.c b/src/selftest.c
index aea6f3128..d8b105d2e 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -78,6 +78,8 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
     tmp.pw_len = (u32) tmp_len;
   }
 
+  cl_event opencl_event;
+
   pw_t pw;
   pw_t comb;
   bf_t bf;
@@ -118,7 +120,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
     if (device_param->is_opencl == true)
     {
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
     }
   }
   else
@@ -163,7 +165,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
         if (device_param->is_opencl == true)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
         }
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
@@ -239,9 +241,9 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
         if (device_param->is_opencl == true)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, &opencl_event) == -1) return -1;
 
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
         }
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
@@ -284,7 +286,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
           }
         }
         else
@@ -345,7 +347,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_FALSE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, NULL) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_FALSE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, &opencl_event) == -1) return -1;
           }
 
           memset (&pw, 0, sizeof (pw));
@@ -451,7 +453,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
           }
 
           *highest_pw_len = pw.pw_len;
@@ -489,11 +491,20 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
       if (device_param->is_opencl == true)
       {
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, NULL) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
       }
     }
   }
 
+  // prevent race condition on OpenCL with non-blocking write
+
+  if (device_param->is_opencl == true)
+  {
+    if (hc_clWaitForEvents (hashcat_ctx, 1, &opencl_event) == -1) return -1;
+
+    if (hc_clReleaseEvent (hashcat_ctx, opencl_event) == -1) return -1;
+  }
+
   return 0;
 }
 

From f8df94f4571d557e50ca3a25e5e62111df18dcf4 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 30 Jun 2025 11:26:05 +0200
Subject: [PATCH 52/83] Switched all async and non-blocking calls to
 synchronous and blocking ones. Kept the original async bindings intact. This
 avoids race conditions like the one fixed in the previous commit, with no
 performance impact. Fixed a typedef issue for clEnqueueReadBuffer(). Updated
 Python/hcshared.py with missing entry for new salt_dimy attribute in salt_t
 struct. Fixed a bug in the autotuner when determining the starting value for
 kernel loops, in cases where the iteration count is N-1 and not a multiple of
 1024. Updated additional plugins to use OPTI_TYPE_REGISTER_LIMIT.

---
 Python/hcshared.py         |   3 +-
 include/ext_OpenCL.h       |   2 +-
 include/ext_cuda.h         |  27 +++-
 include/ext_hip.h          |  15 ++
 src/autotune.c             |   8 +-
 src/backend.c              | 280 ++++++++++++++++++-------------------
 src/ext_cuda.c             | 145 ++++++++++++++++++-
 src/ext_hip.c              | 143 ++++++++++++++++++-
 src/hashes.c               |  14 +-
 src/modules/module_01300.c |   1 +
 src/modules/module_07100.c |  11 +-
 src/modules/module_08200.c |   1 +
 src/selftest.c             |  90 ++++++------
 13 files changed, 523 insertions(+), 217 deletions(-)

diff --git a/Python/hcshared.py b/Python/hcshared.py
index 2fcf56c70..ad4390083 100644
--- a/Python/hcshared.py
+++ b/Python/hcshared.py
@@ -6,13 +6,14 @@ import sys
 
 def extract_salts(salts_buf) -> list:
   salts=[]
-  for salt_buf, salt_buf_pc, salt_len, salt_len_pc, salt_iter, salt_iter2, salt_sign, salt_repeats, orig_pos, digests_cnt, digests_done, digests_offset, scrypt_N, scrypt_r, scrypt_p in struct.iter_unpack("256s 256s I I I I 8s I I I I I I I I", salts_buf):
+  for salt_buf, salt_buf_pc, salt_len, salt_len_pc, salt_iter, salt_iter2, salt_dimy, salt_sign, salt_repeats, orig_pos, digests_cnt, digests_done, digests_offset, scrypt_N, scrypt_r, scrypt_p in struct.iter_unpack("256s 256s I I I I I 8s I I I I I I I I", salts_buf):
     salt_buf = salt_buf[0:salt_len]
     salt_buf_pc = salt_buf_pc[0:salt_len_pc]
     salts.append({ "salt_buf":      salt_buf,     \
                    "salt_buf_pc":   salt_buf_pc,  \
                    "salt_iter":     salt_iter,    \
                    "salt_iter2":    salt_iter2,   \
+                   "salt_dimy":     salt_dimy,    \
                    "salt_sign":     salt_sign,    \
                    "salt_repeats":  salt_repeats, \
                    "orig_pos":      orig_pos,     \
diff --git a/include/ext_OpenCL.h b/include/ext_OpenCL.h
index d49f82c79..7044ac667 100644
--- a/include/ext_OpenCL.h
+++ b/include/ext_OpenCL.h
@@ -51,7 +51,7 @@ typedef cl_int           (CL_API_CALL *OCL_CLENQUEUEFILLBUFFER)       (cl_comman
 typedef cl_int           (CL_API_CALL *OCL_CLENQUEUECOPYBUFFER)       (cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);
 typedef void *           (CL_API_CALL *OCL_CLENQUEUEMAPBUFFER)        (cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event *, cl_event *, cl_int *);
 typedef cl_int           (CL_API_CALL *OCL_CLENQUEUENDRANGEKERNEL)    (cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
-typedef cl_int           (CL_API_CALL *OCL_CLENQUEUEREADBUFFER)       (cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
+typedef cl_int           (CL_API_CALL *OCL_CLENQUEUEREADBUFFER)       (cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
 typedef cl_int           (CL_API_CALL *OCL_CLENQUEUEUNMAPMEMOBJECT)   (cl_command_queue, cl_mem, void *, cl_uint, const cl_event *, cl_event *);
 typedef cl_int           (CL_API_CALL *OCL_CLENQUEUEWRITEBUFFER)      (cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
 typedef cl_int           (CL_API_CALL *OCL_CLFINISH)                  (cl_command_queue);
diff --git a/include/ext_cuda.h b/include/ext_cuda.h
index 44ae01cd6..a80af5367 100644
--- a/include/ext_cuda.h
+++ b/include/ext_cuda.h
@@ -1154,14 +1154,19 @@ typedef CUresult (CUDA_API_CALL *CUDA_CUINIT)                   (unsigned int);
 typedef CUresult (CUDA_API_CALL *CUDA_CULAUNCHKERNEL)           (CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOC)               (CUdeviceptr *, size_t);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOCHOST)           (void **, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOD)             (CUdeviceptr, CUdeviceptr, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOH)             (void *, CUdeviceptr, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTOD)             (CUdeviceptr, const void *, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32)              (CUdeviceptr, unsigned int, size_t);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8)               (CUdeviceptr, unsigned char, size_t);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTODASYNC)        (CUdeviceptr, CUdeviceptr, size_t, CUstream);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOHASYNC)        (void *, CUdeviceptr, size_t, CUstream);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTODASYNC)        (CUdeviceptr, const void *, size_t, CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32ASYNC)         (CUdeviceptr, unsigned int, size_t, CUstream);
+typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8ASYNC)          (CUdeviceptr, unsigned char, size_t, CUstream);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREE)                (CUdeviceptr);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREEHOST)            (void *);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMEMGETINFO)             (size_t *, size_t *);
-typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32ASYNC)         (CUdeviceptr, unsigned int, size_t, CUstream);
-typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8ASYNC)          (CUdeviceptr, unsigned char, size_t, CUstream);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETFUNCTION)      (CUfunction *, CUmodule, const char *);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETGLOBAL)        (CUdeviceptr *, size_t *, CUmodule, const char *);
 typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOAD)             (CUmodule *, const char *);
@@ -1217,14 +1222,19 @@ typedef struct hc_cuda_lib
   CUDA_CULAUNCHKERNEL           cuLaunchKernel;
   CUDA_CUMEMALLOC               cuMemAlloc;
   CUDA_CUMEMALLOCHOST           cuMemAllocHost;
+  CUDA_CUMEMCPYDTOD             cuMemcpyDtoD;
+  CUDA_CUMEMCPYDTOH             cuMemcpyDtoH;
+  CUDA_CUMEMCPYHTOD             cuMemcpyHtoD;
+  CUDA_CUMEMSETD32              cuMemsetD32;
+  CUDA_CUMEMSETD8               cuMemsetD8;
   CUDA_CUMEMCPYDTODASYNC        cuMemcpyDtoDAsync;
   CUDA_CUMEMCPYDTOHASYNC        cuMemcpyDtoHAsync;
   CUDA_CUMEMCPYHTODASYNC        cuMemcpyHtoDAsync;
+  CUDA_CUMEMSETD32ASYNC         cuMemsetD32Async;
+  CUDA_CUMEMSETD8ASYNC          cuMemsetD8Async;
   CUDA_CUMEMFREE                cuMemFree;
   CUDA_CUMEMFREEHOST            cuMemFreeHost;
   CUDA_CUMEMGETINFO             cuMemGetInfo;
-  CUDA_CUMEMSETD32ASYNC         cuMemsetD32Async;
-  CUDA_CUMEMSETD8ASYNC          cuMemsetD8Async;
   CUDA_CUMODULEGETFUNCTION      cuModuleGetFunction;
   CUDA_CUMODULEGETGLOBAL        cuModuleGetGlobal;
   CUDA_CUMODULELOAD             cuModuleLoad;
@@ -1272,13 +1282,18 @@ int hc_cuFuncSetAttribute      (void *hashcat_ctx, CUfunction hfunc, CUfunction_
 int hc_cuInit                  (void *hashcat_ctx, unsigned int Flags);
 int hc_cuLaunchKernel          (void *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
 int hc_cuMemAlloc              (void *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
+int hc_cuMemcpyDtoD            (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
+int hc_cuMemcpyDtoH            (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
+int hc_cuMemcpyHtoD            (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
+int hc_cuMemsetD32             (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N);
+int hc_cuMemsetD8              (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N);
 int hc_cuMemcpyDtoDAsync       (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
 int hc_cuMemcpyDtoHAsync       (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
 int hc_cuMemcpyHtoDAsync       (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
-int hc_cuMemFree               (void *hashcat_ctx, CUdeviceptr dptr);
-int hc_cuMemGetInfo            (void *hashcat_ctx, size_t *free, size_t *total);
 int hc_cuMemsetD32Async        (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
 int hc_cuMemsetD8Async         (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
+int hc_cuMemFree               (void *hashcat_ctx, CUdeviceptr dptr);
+int hc_cuMemGetInfo            (void *hashcat_ctx, size_t *free, size_t *total);
 int hc_cuModuleGetFunction     (void *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
 int hc_cuModuleGetGlobal       (void *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
 int hc_cuModuleLoadDataEx      (void *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
diff --git a/include/ext_hip.h b/include/ext_hip.h
index 4cc6fb797..d0f53d173 100644
--- a/include/ext_hip.h
+++ b/include/ext_hip.h
@@ -588,6 +588,11 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPLAUNCHKERNEL)           (hipFunction_t,
 typedef hipError_t (HIP_API_CALL *HIP_HIPMEMALLOC)               (hipDeviceptr_t *, size_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMEMFREE)                (hipDeviceptr_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMEMGETINFO)             (size_t *, size_t *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOD)             (hipDeviceptr_t, hipDeviceptr_t, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOH)             (void *, hipDeviceptr_t, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTOD)             (hipDeviceptr_t, const void *, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMSETD32)              (hipDeviceptr_t, unsigned int, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMEMSETD8)               (hipDeviceptr_t, unsigned char, size_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTODASYNC)        (hipDeviceptr_t, hipDeviceptr_t, size_t, hipStream_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYDTOHASYNC)        (void *, hipDeviceptr_t, size_t, hipStream_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMEMCPYHTODASYNC)        (hipDeviceptr_t, const void *, size_t, hipStream_t);
@@ -633,6 +638,11 @@ typedef struct hc_hip_lib
   HIP_HIPMEMALLOC               hipMemAlloc;
   HIP_HIPMEMFREE                hipMemFree;
   HIP_HIPMEMGETINFO             hipMemGetInfo;
+  HIP_HIPMEMCPYDTOD             hipMemcpyDtoD;
+  HIP_HIPMEMCPYDTOH             hipMemcpyDtoH;
+  HIP_HIPMEMCPYHTOD             hipMemcpyHtoD;
+  HIP_HIPMEMSETD32              hipMemsetD32;
+  HIP_HIPMEMSETD8               hipMemsetD8;
   HIP_HIPMEMCPYDTODASYNC        hipMemcpyDtoDAsync;
   HIP_HIPMEMCPYDTOHASYNC        hipMemcpyDtoHAsync;
   HIP_HIPMEMCPYHTODASYNC        hipMemcpyHtoDAsync;
@@ -680,6 +690,11 @@ int hc_hipLaunchKernel         (void *hashcat_ctx, hipFunction_t f, unsigned int
 int hc_hipMemAlloc             (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize);
 int hc_hipMemFree              (void *hashcat_ctx, hipDeviceptr_t dptr);
 int hc_hipMemGetInfo           (void *hashcat_ctx, size_t *free, size_t *total);
+int hc_hipMemcpyDtoD           (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount);
+int hc_hipMemcpyDtoH           (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
+int hc_hipMemcpyHtoD           (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount);
+int hc_hipMemsetD32            (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N);
+int hc_hipMemsetD8             (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N);
 int hc_hipMemcpyDtoDAsync      (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
 int hc_hipMemcpyDtoHAsync      (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
 int hc_hipMemcpyHtoDAsync      (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
diff --git a/src/autotune.c b/src/autotune.c
index 065c0a217..a599e65be 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -268,12 +268,12 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -344,8 +344,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter));
         start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1));
 
-        if ((hashes->st_salts_buf->salt_iter     % 125) == 0) start = MIN (start, 125);
-        if ((hashes->st_salts_buf->salt_iter + 1 % 125) == 0) start = MIN (start, 125);
+        if (((hashes->st_salts_buf->salt_iter + 0) % 125) == 0) start = MIN (start, 125);
+        if (((hashes->st_salts_buf->salt_iter + 1) % 125) == 0) start = MIN (start, 125);
 
         if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
         {
diff --git a/src/backend.c b/src/backend.c
index 00cac2245..2d34caee1 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -985,7 +985,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
   {
     if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
 
-    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, &pw_idx, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t), device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
 
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
   }
@@ -994,7 +994,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
   {
     if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
 
-    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t), device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
 
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
   }
@@ -1020,14 +1020,14 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
   {
     if (device_param->is_cuda == true)
     {
-      if (hc_cuMemcpyDtoHAsync (hashcat_ctx, pw->i, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyDtoH (hashcat_ctx, pw->i, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
 
       if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
     }
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipMemcpyDtoHAsync (hashcat_ctx, pw->i, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyDtoH (hashcat_ctx, pw->i, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
 
       if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
     }
@@ -1072,7 +1072,7 @@ int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u
   {
     if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
 
-    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyDtoH (hashcat_ctx, dest, device_param->cuda_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1;
 
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
 
@@ -1083,7 +1083,7 @@ int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u
   {
     if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
 
-    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t)), device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1;
 
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
 
@@ -1112,7 +1112,7 @@ int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   {
     if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return -1;
 
-    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, dest, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyDtoH (hashcat_ctx, dest, device_param->cuda_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
 
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
 
@@ -1123,7 +1123,7 @@ int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
   {
     if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
 
-    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32), device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
 
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
 
@@ -1199,12 +1199,12 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_tm_c, size_tm, device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_tm_c, size_tm) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm, device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_tm_c, size_tm) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -1309,12 +1309,12 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
     {
       if (device_param->is_cuda == true)
       {
-        if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, device_param->cuda_d_pws_amp_buf, pws_cnt * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_pws_buf, device_param->cuda_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1;
       }
 
       if (device_param->is_hip == true)
       {
-        if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_pws_buf, device_param->hip_d_pws_amp_buf, pws_cnt * sizeof (pw_t)) == -1) return -1;
       }
 
       #if defined (__APPLE__)
@@ -1373,14 +1373,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size, device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
 
           if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
 
           if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
         }
@@ -1432,12 +1432,12 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
         }
 
         #if defined (__APPLE__)
@@ -1534,14 +1534,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           {
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, pws_cnt * hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, pws_cnt * hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -1563,14 +1563,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -1625,14 +1625,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, pws_cnt * hashconfig->hook_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -1684,12 +1684,12 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, pws_cnt * hashconfig->hook_size) == -1) return -1;
             }
 
             #if defined (__APPLE__)
@@ -1774,14 +1774,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
           {
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, pws_cnt * hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, pws_cnt * hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -1803,14 +1803,14 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, pws_cnt * hashconfig->tmp_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -2059,12 +2059,12 @@ int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t
   return 0;
 }
 
-int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size)
+int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u8 value, const u64 size)
 {
-  return hc_cuMemsetD8Async (hashcat_ctx, buf + offset, value, size, device_param->cuda_stream);
+  return hc_cuMemsetD8 (hashcat_ctx, buf + offset, value, size);
 }
 
-int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size)
+int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED hc_device_param_t *device_param, CUdeviceptr buf, const u64 offset, const u32 value, const u64 size)
 {
   /* check that the size is multiple of element size */
   if (size % 4 != 0)
@@ -2072,7 +2072,7 @@ int run_cuda_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *dev
     return CUDA_ERROR_INVALID_VALUE;
   }
 
-  return hc_cuMemsetD32Async (hashcat_ctx, buf + offset, value, size / 4, device_param->cuda_stream);
+  return hc_cuMemsetD32 (hashcat_ctx, buf + offset, value, size / 4);
 }
 
 int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size)
@@ -2096,7 +2096,7 @@ int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
   if (num16m)
   {
-    if (hc_cuMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), bzeros, num16m, device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), bzeros, num16m) == -1) return -1;
   }
 
   return 0;
@@ -2138,12 +2138,12 @@ int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t
   return 0;
 }
 
-int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8  value, const u64 size)
+int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u8  value, const u64 size)
 {
-  return hc_hipMemsetD8Async (hashcat_ctx, buf + offset, value, size, device_param->hip_stream);
+  return hc_hipMemsetD8 (hashcat_ctx, buf + offset, value, size);
 }
 
-int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size)
+int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 offset, const u32 value, const u64 size)
 {
   /* check that the size is multiple of element size */
   if (size % 4 != 0)
@@ -2151,7 +2151,7 @@ int run_hip_kernel_memset32 (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
     return hipErrorInvalidValue;
   }
 
-  return hc_hipMemsetD32Async (hashcat_ctx, buf + offset, value, size / 4, device_param->hip_stream);
+  return hc_hipMemsetD32 (hashcat_ctx, buf + offset, value, size / 4);
 }
 
 int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, hipDeviceptr_t buf, const u64 size)
@@ -2175,7 +2175,7 @@ int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
   if (num16m)
   {
-    if (hc_hipMemcpyHtoDAsync (hashcat_ctx, buf + (num16d * 16), bzeros, num16m, device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyHtoD (hashcat_ctx, buf + (num16d * 16), bzeros, num16m) == -1) return -1;
   }
 
   return 0;
@@ -2466,7 +2466,7 @@ int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
     }
     else
     {
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_FALSE, num16d * 16, num16m, bzeros, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, buf, CL_TRUE, num16d * 16, num16m, bzeros, 0, NULL, NULL) == -1) return -1;
     }
   }
 
@@ -2592,7 +2592,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       case KERN_RUN_AUX4:   cuda_function = device_param->cuda_function_aux4;   break;
     }
 
-    if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_kernel_param, &device_param->kernel_param, device_param->size_kernel_params, device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_kernel_param, &device_param->kernel_param, device_param->size_kernel_params) == -1) return -1;
 
     if (hc_cuFuncSetAttribute (hashcat_ctx, cuda_function, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1;
 
@@ -2711,7 +2711,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       case KERN_RUN_AUX4:   hip_function = device_param->hip_function_aux4;   break;
     }
 
-    if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_kernel_param, &device_param->kernel_param, device_param->size_kernel_params, device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_kernel_param, &device_param->kernel_param, device_param->size_kernel_params) == -1) return -1;
 
     //if (hc_hipFuncSetAttribute (hashcat_ctx, hip_function, HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, dynamic_shared_mem) == -1) return -1;
 
@@ -2982,7 +2982,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       if (hc_clSetKernelArg (hashcat_ctx, opencl_kernel, i, sizeof (cl_mem), device_param->kernel_params[i]) == -1) return -1;
     }
 
-    if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_kernel_param, CL_FALSE, 0, device_param->size_kernel_params, &device_param->kernel_param, 0, NULL, NULL) == -1) return -1;
+    if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_kernel_param, CL_TRUE, 0, device_param->size_kernel_params, &device_param->kernel_param, 0, NULL, NULL) == -1) return -1;
 
     /*
     for (u32 i = 24; i <= 34; i++)
@@ -3616,7 +3616,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
   {
     if (device_param->is_cuda == true)
     {
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
       const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3624,13 +3624,13 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
       if (off)
       {
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
       }
     }
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
       const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3638,7 +3638,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
       if (off)
       {
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
       }
     }
 
@@ -3660,7 +3660,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
     if (device_param->is_opencl == true)
     {
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
 
       const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3668,7 +3668,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
       if (off)
       {
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_FALSE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
       }
     }
 
@@ -3680,7 +3680,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
     {
       if (device_param->is_cuda == true)
       {
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
         const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3688,13 +3688,13 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (off)
         {
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
         }
       }
 
       if (device_param->is_hip == true)
       {
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
         const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3702,7 +3702,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (off)
         {
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
         }
       }
 
@@ -3724,7 +3724,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
       if (device_param->is_opencl == true)
       {
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
 
         const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3732,7 +3732,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (off)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_FALSE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
         }
       }
 
@@ -3778,7 +3778,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
           const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3786,13 +3786,13 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
           }
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
           const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3800,7 +3800,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
           }
         }
 
@@ -3822,7 +3822,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
         if (device_param->is_opencl == true)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
 
           const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3830,7 +3830,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (off)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_FALSE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
           }
         }
 
@@ -3842,7 +3842,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         {
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
             const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3850,13 +3850,13 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
             if (off)
             {
-              if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
             }
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
             const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3864,7 +3864,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
             if (off)
             {
-              if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
             }
           }
 
@@ -3886,7 +3886,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
 
             const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3894,7 +3894,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
             if (off)
             {
-              if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_FALSE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
+              if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
             }
           }
 
@@ -3904,7 +3904,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
         {
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
             const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3912,13 +3912,13 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
             if (off)
             {
-              if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
             }
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_idx, device_param->pws_idx, pws_cnt * sizeof (pw_idx_t)) == -1) return -1;
 
             const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3926,7 +3926,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
             if (off)
             {
-              if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32), device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_comp_buf, device_param->pws_comp, off * sizeof (u32)) == -1) return -1;
             }
           }
 
@@ -3948,7 +3948,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_FALSE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_idx, CL_TRUE, 0, pws_cnt * sizeof (pw_idx_t), device_param->pws_idx, 0, NULL, NULL) == -1) return -1;
 
             const pw_idx_t *pw_idx = device_param->pws_idx + pws_cnt;
 
@@ -3956,7 +3956,7 @@ int run_copy (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const
 
             if (off)
             {
-              if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_FALSE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
+              if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_comp_buf, CL_TRUE, 0, off * sizeof (u32), device_param->pws_comp, 0, NULL, NULL) == -1) return -1;
             }
           }
 
@@ -4190,12 +4190,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
         {
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules + (innerloop_pos * sizeof (kernel_rule_t)), innerloop_left * sizeof (kernel_rule_t)) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -4322,12 +4322,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               #if defined (__APPLE__)
@@ -4339,7 +4339,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_opencl == true)
               {
-                if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
+                if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
               }
             }
             else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
@@ -4352,12 +4352,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               #if defined (__APPLE__)
@@ -4382,12 +4382,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               #if defined (__APPLE__)
@@ -4515,12 +4515,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->combs_buf, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               #if defined (__APPLE__)
@@ -4532,7 +4532,7 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_opencl == true)
               {
-                if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
+                if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, innerloop_left * sizeof (pw_t), device_param->combs_buf, 0, NULL, NULL) == -1) return -1;
               }
             }
             else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
@@ -4545,12 +4545,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_combs_c, device_param->cuda_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_combs_c, device_param->hip_d_combs, innerloop_left * sizeof (pw_t)) == -1) return -1;
               }
 
               #if defined (__APPLE__)
@@ -4577,12 +4577,12 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co
 
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_bfs, innerloop_left * sizeof (bf_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_bfs_c, device_param->cuda_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_bfs_c, device_param->hip_d_bfs, innerloop_left * sizeof (bf_t)) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -11863,16 +11863,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_salts_buf,   size_st_salts)           == -1) return -1;
       if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_kernel_param,   size_kernel_params)      == -1) return -1;
 
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf,     size_digests,            device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_salt_bufs,   hashes->salts_buf,       size_salts,              device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_digests_buf, hashes->digests_buf,     size_digests)            == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_salt_bufs,   hashes->salts_buf,       size_salts)              == -1) return -1;
 
       /**
        * special buffers
@@ -11899,7 +11899,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_rules_c, size_rules_c) == -1) return -1;
           }
 
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules_src, device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_rules, straight_ctx->kernel_rules_buf, size_rules_src) == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
@@ -11934,19 +11934,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_esalt_bufs, size_esalts) == -1) return -1;
 
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_esalt_bufs, hashes->esalts_buf, size_esalts) == -1) return -1;
       }
 
       if (hashconfig->st_hash != NULL)
       {
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->cuda_stream) == -1) return -1;
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts,   device_param->cuda_stream)   == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_st_digests_buf, hashes->st_digests_buf, size_st_digests) == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts)   == -1) return -1;
 
         if (size_esalts)
         {
           if (hc_cuMemAlloc (hashcat_ctx, &device_param->cuda_d_st_esalts_buf, size_st_esalts) == -1) return -1;
 
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts) == -1) return -1;
         }
       }
     }
@@ -11974,16 +11974,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_salts_buf,   size_st_salts)           == -1) return -1;
       if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_kernel_param,   size_kernel_params)      == -1) return -1;
 
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size, device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf,     size_digests,            device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_salt_bufs,   hashes->salts_buf,       size_salts,              device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_a, bitmap_ctx->bitmap_s1_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_b, bitmap_ctx->bitmap_s1_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_c, bitmap_ctx->bitmap_s1_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s1_d, bitmap_ctx->bitmap_s1_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_a, bitmap_ctx->bitmap_s2_a, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_b, bitmap_ctx->bitmap_s2_b, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_c, bitmap_ctx->bitmap_s2_c, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bitmap_s2_d, bitmap_ctx->bitmap_s2_d, bitmap_ctx->bitmap_size) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_buf, hashes->digests_buf,     size_digests)            == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_salt_bufs,   hashes->salts_buf,       size_salts)              == -1) return -1;
 
       /**
        * special buffers
@@ -12010,7 +12010,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
             if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_rules_c, size_rules_c) == -1) return -1;
           }
 
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules_src, device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_rules, straight_ctx->kernel_rules_buf, size_rules_src) == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
@@ -12045,19 +12045,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_esalt_bufs, size_esalts) == -1) return -1;
 
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts, device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_esalt_bufs, hashes->esalts_buf, size_esalts) == -1) return -1;
       }
 
       if (hashconfig->st_hash != NULL)
       {
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests, device_param->hip_stream) == -1) return -1;
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts,   device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_digests_buf, hashes->st_digests_buf, size_st_digests) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_salts_buf,   hashes->st_salts_buf,   size_st_salts) == -1) return -1;
 
         if (size_esalts)
         {
           if (hc_hipMemAlloc (hashcat_ctx, &device_param->hip_d_st_esalts_buf, size_st_esalts) == -1) return -1;
 
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts, device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_st_esalts_buf, hashes->st_esalts_buf, size_st_esalts) == -1) return -1;
         }
       }
     }
@@ -12195,15 +12195,15 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_st_salts,           NULL, &device_param->opencl_d_st_salts_buf)   == -1) return -1;
       if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY,   size_kernel_params,      NULL, &device_param->opencl_d_kernel_param)   == -1) return -1;
 
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_FALSE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_FALSE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_a, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_a, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_b, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_b, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_c, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_c, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s1_d, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s1_d, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_a, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_a, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_b, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_b, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_c, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_c, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bitmap_s2_d, CL_TRUE, 0, bitmap_ctx->bitmap_size, bitmap_ctx->bitmap_s2_d, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_buf, CL_TRUE, 0, size_digests,            hashes->digests_buf,     0, NULL, NULL) == -1) return -1;
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_salt_bufs,   CL_FALSE, 0, size_salts,              hashes->salts_buf,       0, NULL, NULL) == -1) return -1;
 
       /**
@@ -12221,7 +12221,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules,   NULL, &device_param->opencl_d_rules)   == -1) return -1;
           if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_rules_c, NULL, &device_param->opencl_d_rules_c) == -1) return -1;
 
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_FALSE, 0, size_rules_src, straight_ctx->kernel_rules_buf, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, CL_TRUE, 0, size_rules_src, straight_ctx->kernel_rules_buf, 0, NULL, NULL) == -1) return -1;
         }
         else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
         {
@@ -12244,7 +12244,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       {
         if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_esalts, NULL, &device_param->opencl_d_esalt_bufs) == -1) return -1;
 
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_FALSE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_esalt_bufs, CL_TRUE, 0, size_esalts, hashes->esalts_buf, 0, NULL, NULL) == -1) return -1;
       }
 
       if (hashconfig->st_hash != NULL)
@@ -12256,7 +12256,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         {
           if (hc_clCreateBuffer (hashcat_ctx, device_param->opencl_context, CL_MEM_READ_ONLY, size_st_esalts, NULL, &device_param->opencl_d_st_esalts_buf) == -1) return -1;
 
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_FALSE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_st_esalts_buf, CL_TRUE, 0, size_st_esalts, hashes->st_esalts_buf, 0, NULL, NULL) == -1) return -1;
         }
       }
 
@@ -17537,14 +17537,14 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css,   device_param->cuda_stream)   == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
     }
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css,   device_param->hip_stream)   == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
     }
 
     #if defined (__APPLE__)
@@ -17558,7 +17558,7 @@ int backend_session_update_mp (hashcat_ctx_t *hashcat_ctx)
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf,   CL_FALSE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_markov_css_buf, CL_FALSE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_markov_css_buf, CL_TRUE,  0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL) == -1) return -1;
 
       if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
     }
@@ -17593,14 +17593,14 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
 
     if (device_param->is_cuda == true)
     {
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css,   device_param->cuda_stream) == -1) return -1;
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
     }
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css,   device_param->hip_stream) == -1) return -1;
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css, device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_root_css_buf,   mask_ctx->root_css_buf,   device_param->size_root_css)   == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_markov_css_buf, mask_ctx->markov_css_buf, device_param->size_markov_css) == -1) return -1;
     }
 
     #if defined (__APPLE__)
@@ -17614,7 +17614,7 @@ int backend_session_update_mp_rl (hashcat_ctx_t *hashcat_ctx, const u32 css_cnt_
     if (device_param->is_opencl == true)
     {
       if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_root_css_buf,   CL_FALSE, 0, device_param->size_root_css,   mask_ctx->root_css_buf,   0, NULL, NULL) == -1) return -1;
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_markov_css_buf, CL_FALSE, 0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_markov_css_buf, CL_TRUE,  0, device_param->size_markov_css, mask_ctx->markov_css_buf, 0, NULL, NULL) == -1) return -1;
 
       if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
     }
diff --git a/src/ext_cuda.c b/src/ext_cuda.c
index 4c0db9ffb..878977ade 100644
--- a/src/ext_cuda.c
+++ b/src/ext_cuda.c
@@ -87,14 +87,19 @@ int cuda_init (void *hashcat_ctx)
   HC_LOAD_FUNC_CUDA (cuda, cuLaunchKernel,           cuLaunchKernel,            CUDA_CULAUNCHKERNEL,            CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemAlloc,               cuMemAlloc_v2,             CUDA_CUMEMALLOC,                CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemAllocHost,           cuMemAllocHost_v2,         CUDA_CUMEMALLOCHOST,            CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoD,             cuMemcpyDtoD_v2,           CUDA_CUMEMCPYDTOD,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoH,             cuMemcpyDtoH_v2,           CUDA_CUMEMCPYDTOH,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoD,             cuMemcpyHtoD_v2,           CUDA_CUMEMCPYHTOD,              CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32,              cuMemsetD32,               CUDA_CUMEMSETD32,               CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8,               cuMemsetD8,                CUDA_CUMEMSETD8,                CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoDAsync,        cuMemcpyDtoDAsync_v2,      CUDA_CUMEMCPYDTODASYNC,         CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoHAsync,        cuMemcpyDtoHAsync_v2,      CUDA_CUMEMCPYDTOHASYNC,         CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoDAsync,        cuMemcpyHtoDAsync_v2,      CUDA_CUMEMCPYHTODASYNC,         CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async,         cuMemsetD32Async,          CUDA_CUMEMSETD32ASYNC,          CUDA, 1);
+  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async,          cuMemsetD8Async,           CUDA_CUMEMSETD8ASYNC,           CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemFree,                cuMemFree_v2,              CUDA_CUMEMFREE,                 CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemFreeHost,            cuMemFreeHost,             CUDA_CUMEMFREEHOST,             CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuMemGetInfo,             cuMemGetInfo_v2,           CUDA_CUMEMGETINFO,              CUDA, 1);
-  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async,         cuMemsetD32Async,          CUDA_CUMEMSETD32ASYNC,          CUDA, 1);
-  HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async,          cuMemsetD8Async,           CUDA_CUMEMSETD8ASYNC,           CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuModuleGetFunction,      cuModuleGetFunction,       CUDA_CUMODULEGETFUNCTION,       CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuModuleGetGlobal,        cuModuleGetGlobal_v2,      CUDA_CUMODULEGETGLOBAL,         CUDA, 1);
   HC_LOAD_FUNC_CUDA (cuda, cuModuleLoad,             cuModuleLoad,              CUDA_CUMODULELOAD,              CUDA, 1);
@@ -517,6 +522,142 @@ int hc_cuMemFree (void *hashcat_ctx, CUdeviceptr dptr)
   return 0;
 }
 
+
+int hc_cuMemcpyDtoH (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemcpyDtoH (dstHost, srcDevice, ByteCount);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoH(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoH(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemcpyDtoD (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemcpyDtoD (dstDevice, srcDevice, ByteCount);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyDtoD(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemcpyHtoD (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemcpyHtoD (dstDevice, srcHost, ByteCount);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyHtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemcpyHtoD(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemsetD32 (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemsetD32 (dstDevice, ui, N);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemsetD32(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemsetD32(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_cuMemsetD8 (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
+
+  const CUresult CU_err = cuda->cuMemsetD8 (dstDevice, uc, N);
+
+  if (CU_err != CUDA_SUCCESS)
+  {
+    const char *pStr = NULL;
+
+    if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
+    {
+      event_log_error (hashcat_ctx, "cuMemsetD8(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "cuMemsetD8(): %d", CU_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_cuMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream)
 {
   backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
diff --git a/src/ext_hip.c b/src/ext_hip.c
index decfa987c..4a26a7e9f 100644
--- a/src/ext_hip.c
+++ b/src/ext_hip.c
@@ -140,12 +140,16 @@ int hip_init (void *hashcat_ctx)
   HC_LOAD_FUNC_HIP (hip, hipMemAlloc,               hipMalloc,                  HIP_HIPMEMALLOC,                HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemFree,                hipFree,                    HIP_HIPMEMFREE,                 HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemGetInfo,             hipMemGetInfo,              HIP_HIPMEMGETINFO,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD,             hipMemcpyDtoD,              HIP_HIPMEMCPYDTOD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH,             hipMemcpyDtoH,              HIP_HIPMEMCPYDTOH,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD,             hipMemcpyHtoD,              HIP_HIPMEMCPYHTOD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD32,              hipMemsetD32,               HIP_HIPMEMSETD32,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD8,               hipMemsetD8,                HIP_HIPMEMSETD8,                HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync,        hipMemcpyDtoDAsync,         HIP_HIPMEMCPYDTODASYNC,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync,        hipMemcpyDtoHAsync,         HIP_HIPMEMCPYDTOHASYNC,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync,        hipMemcpyHtoDAsync,         HIP_HIPMEMCPYHTODASYNC,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemsetD32Async,         hipMemsetD32Async,          HIP_HIPMEMSETD32ASYNC,          HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipMemsetD8Async,          hipMemsetD8Async,           HIP_HIPMEMSETD8ASYNC,           HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync,        hipMemcpyHtoDAsync,         HIP_HIPMEMCPYHTODASYNC,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction,      hipModuleGetFunction,       HIP_HIPMODULEGETFUNCTION,       HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal,        hipModuleGetGlobal,         HIP_HIPMODULEGETGLOBAL,         HIP, 1);
   HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx,       hipModuleLoadDataEx,        HIP_HIPMODULELOADDATAEX,        HIP, 1);
@@ -800,6 +804,143 @@ int hc_hipMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total)
   return 0;
 }
 
+
+
+int hc_hipMemcpyDtoH (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemcpyDtoD (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemcpyHtoD (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemsetD32 (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipMemsetD32 (dstDevice, ui, N);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipMemsetD32(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemsetD32(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipMemsetD8 (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipMemsetD8 (dstDevice, uc, N);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipMemsetD8(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipMemsetD8(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_hipMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
diff --git a/src/hashes.c b/src/hashes.c
index 78a6d5b90..72ab14433 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -334,7 +334,7 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
 
     if (device_param->is_cuda == true)
     {
-      rc = hc_cuMemcpyDtoHAsync (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size, device_param->cuda_stream);
+      rc = hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
 
       if (rc == 0)
       {
@@ -351,7 +351,7 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
 
     if (device_param->is_hip == true)
     {
-      rc = hc_hipMemcpyDtoHAsync (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size, device_param->hip_stream);
+      rc = hc_hipMemcpyDtoH (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
 
       if (rc == 0)
       {
@@ -382,7 +382,7 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
 
     if (device_param->is_opencl == true)
     {
-      rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_FALSE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event);
+      rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event);
 
       if (rc == 0)
       {
@@ -587,14 +587,14 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
   if (device_param->is_cuda == true)
   {
-    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32), device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32)) == -1) return -1;
 
     if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
   }
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32), device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32)) == -1) return -1;
 
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
   }
@@ -624,7 +624,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
   if (device_param->is_cuda == true)
   {
-    rc = hc_cuMemcpyDtoHAsync (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t), device_param->cuda_stream);
+    rc = hc_cuMemcpyDtoH (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t));
 
     if (rc == 0)
     {
@@ -641,7 +641,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
   if (device_param->is_hip == true)
   {
-    rc = hc_hipMemcpyDtoHAsync (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t), device_param->hip_stream);
+    rc = hc_hipMemcpyDtoH (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t));
 
     if (rc == 0)
     {
diff --git a/src/modules/module_01300.c b/src/modules/module_01300.c
index b50a0e99c..c79a3493c 100644
--- a/src/modules/module_01300.c
+++ b/src/modules/module_01300.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH;
 static const char *HASH_NAME      = "SHA2-224";
 static const u64   KERN_TYPE      = 1300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_07100.c b/src/modules/module_07100.c
index ec3924cec..78c8f8081 100644
--- a/src/modules/module_07100.c
+++ b/src/modules/module_07100.c
@@ -85,15 +85,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
   return pw_max;
 }
 
-char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
-{
-  char *jit_build_options = NULL;
-
-  hc_asprintf (&jit_build_options, "-D NO_UNROLL");
-
-  return jit_build_options;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u64 *digest = (u64 *) digest_buf;
@@ -398,7 +389,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
diff --git a/src/modules/module_08200.c b/src/modules/module_08200.c
index 54d5c62b5..90049b1ed 100644
--- a/src/modules/module_08200.c
+++ b/src/modules/module_08200.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_PASSWORD_MANAGER;
 static const char *HASH_NAME      = "1Password, cloudkeychain";
 static const u64   KERN_TYPE      = 8200;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
diff --git a/src/selftest.c b/src/selftest.c
index d8b105d2e..5e744f88b 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -103,12 +103,12 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
     if (device_param->is_cuda == true)
     {
-      if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+      if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
     }
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+      if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
     }
 
     #if defined (__APPLE__)
@@ -120,7 +120,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
     if (device_param->is_opencl == true)
     {
-      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
+      if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
     }
   }
   else
@@ -148,12 +148,12 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
         }
 
         #if defined (__APPLE__)
@@ -165,7 +165,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
         if (device_param->is_opencl == true)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
         }
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_COMBI)
@@ -218,16 +218,16 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_combs_c, &comb, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_combs_c, &comb, 1 * sizeof (pw_t)) == -1) return -1;
 
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_combs_c, &comb, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_combs_c, &comb, 1 * sizeof (pw_t)) == -1) return -1;
 
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
         }
 
         #if defined (__APPLE__)
@@ -241,9 +241,9 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
         if (device_param->is_opencl == true)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_FALSE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, &opencl_event) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_combs_c, CL_TRUE, 0, 1 * sizeof (pw_t), &comb, 0, NULL, &opencl_event) == -1) return -1;
 
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
         }
       }
       else if (user_options_extra->attack_kern == ATTACK_KERN_BF)
@@ -269,12 +269,12 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -286,7 +286,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
           }
         }
         else
@@ -330,12 +330,12 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_bfs_c, &bf, 1 * sizeof (bf_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_bfs_c, &bf, 1 * sizeof (bf_t)) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_bfs_c, &bf, 1 * sizeof (bf_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_bfs_c, &bf, 1 * sizeof (bf_t)) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -347,7 +347,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_FALSE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, &opencl_event) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_bfs_c, CL_TRUE, 0, 1 * sizeof (bf_t), &bf, 0, NULL, &opencl_event) == -1) return -1;
           }
 
           memset (&pw, 0, sizeof (pw));
@@ -436,12 +436,12 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_cuda == true)
           {
-            if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+            if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
           }
 
           if (device_param->is_hip == true)
           {
-            if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+            if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
           }
 
           #if defined (__APPLE__)
@@ -453,7 +453,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
           if (device_param->is_opencl == true)
           {
-            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
+            if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
           }
 
           *highest_pw_len = pw.pw_len;
@@ -474,12 +474,12 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
       if (device_param->is_cuda == true)
       {
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
       }
 
       if (device_param->is_hip == true)
       {
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t), device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_pws_buf, &pw, 1 * sizeof (pw_t)) == -1) return -1;
       }
 
       #if defined (__APPLE__)
@@ -491,7 +491,7 @@ static int selftest_init (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_
 
       if (device_param->is_opencl == true)
       {
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_FALSE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_pws_buf, CL_TRUE, 0, 1 * sizeof (pw_t), &pw, 0, NULL, &opencl_event) == -1) return -1;
       }
     }
   }
@@ -587,14 +587,14 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
 
       if (device_param->is_cuda == true)
       {
-        if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks, device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1;
 
         if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
       }
 
       if (device_param->is_hip == true)
       {
-        if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks, device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1;
 
         if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
       }
@@ -616,12 +616,12 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
 
       if (device_param->is_cuda == true)
       {
-        if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->cuda_stream) == -1) return -1;
+        if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
       }
 
       if (device_param->is_hip == true)
       {
-        if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->hip_stream) == -1) return -1;
+        if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
       }
 
       #if defined (__APPLE__)
@@ -633,7 +633,7 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
 
       if (device_param->is_opencl == true)
       {
-        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
+        if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
       }
     }
 
@@ -683,14 +683,14 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
           {
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, hashconfig->tmp_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, hashconfig->tmp_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -724,14 +724,14 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
           {
             if (device_param->is_cuda == true)
             {
-              if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+              if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, hashconfig->tmp_size) == -1) return -1;
 
               if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
             }
 
             if (device_param->is_hip == true)
             {
-              if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+              if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, hashconfig->tmp_size) == -1) return -1;
 
               if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
             }
@@ -758,14 +758,14 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks, device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->cuda_d_hooks, device_param->size_hooks) == -1) return -1;
 
           if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks, device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->hooks_buf, device_param->hip_d_hooks, device_param->size_hooks) == -1) return -1;
 
           if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
         }
@@ -787,12 +787,12 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
 
         if (device_param->is_cuda == true)
         {
-          if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->cuda_stream) == -1) return -1;
+          if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
         }
 
         if (device_param->is_hip == true)
         {
-          if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks, device_param->hip_stream) == -1) return -1;
+          if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_hooks, device_param->hooks_buf, device_param->size_hooks) == -1) return -1;
         }
 
         #if defined (__APPLE__)
@@ -804,7 +804,7 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
 
         if (device_param->is_opencl == true)
         {
-          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_FALSE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
+          if (hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_hooks, CL_TRUE, 0, device_param->size_hooks, device_param->hooks_buf, 0, NULL, NULL) == -1) return -1;
         }
       }
     }
@@ -846,14 +846,14 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
             {
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->cuda_d_tmps, hashconfig->tmp_size) == -1) return -1;
 
                 if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyDtoHAsync (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyDtoH (hashcat_ctx, device_param->h_tmps, device_param->hip_d_tmps, hashconfig->tmp_size) == -1) return -1;
 
                 if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
               }
@@ -887,12 +887,12 @@ static int selftest_run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *d
             {
               if (device_param->is_cuda == true)
               {
-                if (hc_cuMemcpyHtoDAsync (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, hashconfig->tmp_size, device_param->cuda_stream) == -1) return -1;
+                if (hc_cuMemcpyHtoD (hashcat_ctx, device_param->cuda_d_tmps, device_param->h_tmps, hashconfig->tmp_size) == -1) return -1;
               }
 
               if (device_param->is_hip == true)
               {
-                if (hc_hipMemcpyHtoDAsync (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, hashconfig->tmp_size, device_param->hip_stream) == -1) return -1;
+                if (hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_tmps, device_param->h_tmps, hashconfig->tmp_size) == -1) return -1;
               }
 
               #if defined (__APPLE__)
@@ -962,14 +962,14 @@ static int selftest_cleanup (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
 
   if (device_param->is_cuda == true)
   {
-    if (hc_cuMemcpyDtoHAsync (hashcat_ctx, num_cracked, device_param->cuda_d_result, sizeof (u32), device_param->cuda_stream) == -1) return -1;
+    if (hc_cuMemcpyDtoH (hashcat_ctx, num_cracked, device_param->cuda_d_result, sizeof (u32)) == -1) return -1;
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event3, device_param->cuda_stream) == -1) return -1;
   }
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipMemcpyDtoHAsync (hashcat_ctx, num_cracked, device_param->hip_d_result, sizeof (u32), device_param->hip_stream) == -1) return -1;
+    if (hc_hipMemcpyDtoH (hashcat_ctx, num_cracked, device_param->hip_d_result, sizeof (u32)) == -1) return -1;
 
     if (hc_hipEventRecord (hashcat_ctx, device_param->hip_event3, device_param->hip_stream) == -1) return -1;
   }
@@ -983,7 +983,7 @@ static int selftest_cleanup (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
 
   if (device_param->is_opencl == true)
   {
-    if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_FALSE, 0, sizeof (u32), num_cracked, 0, NULL, &opencl_event) == -1) return -1;
+    if (hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), num_cracked, 0, NULL, &opencl_event) == -1) return -1;
 
     if (hc_clFlush (hashcat_ctx, device_param->opencl_command_queue) == -1) return -1;
   }

From 696fa3b2ad741f60e71589954cce9091ae6c1736 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Mon, 30 Jun 2025 19:38:54 +0200
Subject: [PATCH 53/83] Modified the automatic kernel-accel count reduction
 routine to also reduce kernel-thread count if insufficient device or host
 memory is available. Reduced the fixed memory reservation size from 1GiB to
 64MiB as a result. Added a warning when the user sets a thread count on the
 command line higher than recommended by the runtime (based on available
 registers and shared memory). Added host-side logic to detect true funnel
 shift support and disable kernels using it if not supported on the device.
 Updated more plugins to limit register count to 128 on NVIDIA GPUs.

---
 OpenCL/inc_vendor.h        |  13 +++-
 include/types.h            |   1 +
 src/backend.c              | 119 +++++++++++++++++++++++--------------
 src/modules/module_00020.c |   1 +
 src/modules/module_00021.c |   1 +
 src/modules/module_00022.c |   1 +
 src/modules/module_00023.c |   1 +
 src/modules/module_00024.c |   1 +
 src/modules/module_01500.c |   3 +-
 9 files changed, 95 insertions(+), 46 deletions(-)

diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index a52b4c899..a0df70d23 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -183,13 +183,11 @@ using namespace metal;
 #ifdef IS_CUDA
 #define USE_BITSELECT
 #define USE_ROTATE
-#define USE_FUNNELSHIFT
 #endif
 
 #ifdef IS_HIP
 #define USE_BITSELECT
 #define USE_ROTATE
-#define USE_FUNNELSHIFT
 #endif
 
 #ifdef IS_ROCM
@@ -220,7 +218,18 @@ using namespace metal;
 #define s3 w
 #endif
 
+// funnelshift is nv's bitalign starting from sm 70
+
+#ifdef IS_NV
+#if HAS_SHFW == 1
+#define USE_FUNNELSHIFT
+#endif
+#elif IS_HIP
+#define USE_FUNNELSHIFT
+#endif
+
 // some algorithms do not like this, eg 150, 1100, ...
+
 #ifdef NO_FUNNELSHIFT
 #undef USE_FUNNELSHIFT
 #endif
diff --git a/include/types.h b/include/types.h
index 600e7bd93..896ac9c42 100644
--- a/include/types.h
+++ b/include/types.h
@@ -1516,6 +1516,7 @@ typedef struct hc_device_param
   bool    has_lop3;
   bool    has_mov64;
   bool    has_prmt;
+  bool    has_shfw;
 
   double  spin_damp;
 
diff --git a/src/backend.c b/src/backend.c
index 2d34caee1..a9318be79 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -402,7 +402,8 @@ static bool is_same_device_type (const hc_device_param_t *src, const hc_device_p
   }
 
   if (src->device_processors         != dst->device_processors)         return false;
-  if (src->device_maxclock_frequency != dst->device_maxclock_frequency) return false;
+  // clocks can be different, but clocks should have no impact on workload tuning
+  // if (src->device_maxclock_frequency != dst->device_maxclock_frequency) return false;
   if (src->device_maxworkgroup_size  != dst->device_maxworkgroup_size)  return false;
 
   // memory size can be different, depending on which gpu has a monitor connected
@@ -5920,6 +5921,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->has_lop3  = (sm >= 50) ? true : false;
       device_param->has_mov64 = (sm >= 10) ? true : false;
       device_param->has_prmt  = (sm >= 20) ? true : false;
+      device_param->has_shfw  = (sm >= 70) ? true : false;
 
       // device_available_mem
 
@@ -6397,6 +6399,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->has_lop3  = false;
       device_param->has_mov64 = false;
       device_param->has_prmt  = false;
+      device_param->has_shfw  = false;
 
       // device_available_mem
 
@@ -6885,6 +6888,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->has_lop3  = false;
       device_param->has_mov64 = false;
       device_param->has_prmt  = false;
+      device_param->has_shfw  = false;
 
       // check if we need skip device
 
@@ -8033,6 +8037,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           device_param->has_lop3  = (sm >= 50) ? true : false;
           device_param->has_mov64 = (sm >= 10) ? true : false;
           device_param->has_prmt  = (sm >= 20) ? true : false;
+          device_param->has_shfw  = (sm >= 70) ? true : false;
         }
 
         // common driver check
@@ -8428,6 +8433,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->has_lop3  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                                          \
       device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");  \
       device_param->has_prmt  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                             \
+      device_param->has_shfw  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"shf.l.wrap.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                       \
 
     if (backend_devices_idx > 0)
     {
@@ -8443,6 +8449,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->has_lop3  = device_param_prev->has_lop3;
         device_param->has_mov64 = device_param_prev->has_mov64;
         device_param->has_prmt  = device_param_prev->has_prmt;
+        device_param->has_shfw  = device_param_prev->has_shfw;
       }
       else
       {
@@ -8717,6 +8724,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           device_param->has_lop3  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                    \
           device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
           device_param->has_prmt  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                       \
+          device_param->has_shfw  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"shf.l.wrap.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                 \
 
         if (backend_devices_idx > 0)
         {
@@ -8732,6 +8740,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
             device_param->has_lop3  = device_param_prev->has_lop3;
             device_param->has_mov64 = device_param_prev->has_mov64;
             device_param->has_prmt  = device_param_prev->has_prmt;
+            device_param->has_shfw  = device_param_prev->has_shfw;
           }
           else
           {
@@ -11000,9 +11009,9 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     // we don't have sm_* on vendors not NV but it doesn't matter
 
     #if defined (DEBUG)
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D HAS_SHFW=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->has_shfw, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
     #else
-    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
+    build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%d -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_ADD=%u -D HAS_ADDC=%u -D HAS_SUB=%u -D HAS_SUBC=%u -D HAS_VADD=%u -D HAS_VADDC=%u -D HAS_VADD_CO=%u -D HAS_VADDC_CO=%u -D HAS_VSUB=%u -D HAS_VSUBB=%u -D HAS_VSUB_CO=%u -D HAS_VSUBB_CO=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D HAS_BFE=%u -D HAS_LOP3=%u -D HAS_MOV64=%u -D HAS_PRMT=%u -D HAS_SHFW=%u -D VECT_SIZE=%d -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D ATTACK_EXEC=%u -D ATTACK_KERN=%u -D ATTACK_MODE=%u -w ", device_param->device_local_mem_type, device_param->opencl_platform_vendor_id, (device_param->sm_major * 100) + (device_param->sm_minor * 10), device_param->has_add, device_param->has_addc, device_param->has_sub, device_param->has_subc, device_param->has_vadd, device_param->has_vaddc, device_param->has_vadd_co, device_param->has_vaddc_co, device_param->has_vsub, device_param->has_vsubb, device_param->has_vsub_co, device_param->has_vsubb_co, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->has_bfe, device_param->has_lop3, device_param->has_mov64, device_param->has_prmt, device_param->has_shfw, device_param->vector_width, (u32) device_param->opencl_device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type, hashconfig->attack_exec, user_options_extra->attack_kern, user_options->attack_mode);
     #endif
 
     build_options_buf[build_options_len] = 0;
@@ -16071,6 +16080,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       threads_per_block = device_param->kernel_preferred_wgs_multiple;
     }
 
+    if (user_options->kernel_threads_chgd == true)
+    {
+      if (threads_per_block < user_options->kernel_threads)
+      {
+        event_log_warning (hashcat_ctx, "* Device #%u: The requested thread size '%d' exceeds the recommended limit of the backend runtime '%d'.", device_id + 1, user_options->kernel_threads, threads_per_block);
+      }
+    }
+
     if ((threads_per_block >= device_param->kernel_threads_min) && (threads_per_block <= device_param->kernel_threads_max))
     {
       //printf ("auto thread max: %d\n", threads_per_block);
@@ -16126,11 +16143,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->overtune_unfriendly = true;
     }
 
-    //    device_param->kernel_threads = kernel_threads;
-    device_param->kernel_threads = 0;
 
-    const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
-                                 * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : device_param->kernel_threads_max);
+    device_param->kernel_threads = 0;
+    device_param->kernel_accel = 0;
+
+    u32 kernel_threads_min = device_param->kernel_threads_min;
+    u32 kernel_threads_max = device_param->kernel_threads_max;
 
     u32 kernel_accel_min = device_param->kernel_accel_min;
     u32 kernel_accel_max = device_param->kernel_accel_max;
@@ -16179,27 +16197,29 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if (device_param->is_hip    == true) local_size_bytes = hip_query_max_local_size_bytes    (hashcat_ctx, device_param);
       if (device_param->is_opencl == true) local_size_bytes = opencl_query_max_local_size_bytes (hashcat_ctx, device_param);
       // metal todo
-
-      // use this parameter to tune down kernel_accel_max, because it has such a huge impact on memory requirement
-      // let's target a maximum use of memory of 8GiB so that there's some room left for other stuff
-
-      if (local_size_bytes)
-      {
-        const u64 SIZE_8GiB = 8ULL * 1024 * 1024 * 1024;
-
-        const u64 max_accel = SIZE_8GiB / (hardware_power_max * local_size_bytes);
-
-        kernel_accel_max = MIN (kernel_accel_max, max_accel);
-      }
     }
 
     const u64 size_device_extra1234 = size_extra_buffer1 + size_extra_buffer2 + size_extra_buffer3 + size_extra_buffer4;
 
-    const u64 size_device_extra = MAX ((1024 * 1024 * 1024), size_device_extra1234);
+    // Still not 100% sure about the 64MiB here
 
-    while (kernel_accel_max >= kernel_accel_min)
+    const u64 size_device_extra = MAX ((64ULL * 1024 * 1024), size_device_extra1234);
+
+    // we will first decrease accel and when reached that limit, we will decrease threads
+    // when we decrease limit this will restore accel_max
+
+    int memory_limit_hit = 0;
+
+    const u32 kernel_accel_max_sav = kernel_accel_max;
+
+    while ((kernel_accel_max >= kernel_accel_min) || (kernel_threads_max >= kernel_threads_min))
     {
-      const u64 kernel_power_max = hardware_power_max * kernel_accel_max;
+      const u64 device_processors = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors);
+      const u64 kernel_threads    = ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads_max);
+
+      const u64 kernel_power_max = device_processors * kernel_threads * kernel_accel_max;
+
+      // size_spilling
 
       const u64 size_spilling = kernel_power_max * local_size_bytes;
 
@@ -16246,21 +16266,16 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       // now check if all device-memory sizes which depend on the kernel_accel_max amplifier are within its boundaries
       // if not, decrease amplifier and try again
 
-      int memory_limit_hit = 0;
+      memory_limit_hit = 0;
 
       // sometimes device_available_mem and device_maxmem_alloc reported back from the opencl runtime are a bit inaccurate.
       // let's add some extra space just to be sure.
       // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit
       // let's see if we still need this now that we have low-level API to report free memory
 
-      u64 EXTRA_SPACE = 4096; //(1024ULL * 1024ULL) * kernel_accel_max;
-
-      //EXTRA_SPACE = MAX (EXTRA_SPACE, ( 256ULL * 1024ULL * 1024ULL));
-      //EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
-
-      if ((size_pws   + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
-      if ((size_tmps  + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
-      if ((size_hooks + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
+      if (size_pws   > device_param->device_maxmem_alloc) memory_limit_hit = 1;
+      if (size_tmps  > device_param->device_maxmem_alloc) memory_limit_hit = 1;
+      if (size_hooks > device_param->device_maxmem_alloc) memory_limit_hit = 1;
 
       // work around, for some reason apple opencl can't have buffers larger 2^31
       // typically runs into trap 6
@@ -16338,14 +16353,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         + size_kernel_params
         + size_spilling;
 
-      if ((size_total + EXTRA_SPACE) > device_param->device_available_mem) memory_limit_hit = 1;
-
-      if (memory_limit_hit == 1)
-      {
-        kernel_accel_max--;
-
-        continue;
-      }
+      if (size_total > device_param->device_available_mem) memory_limit_hit = 1;
 
       const u64 size_host_extra = (512 * 1024 * 1024);
 
@@ -16361,11 +16369,29 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         + size_pws_base
         + size_host_extra;
 
-      if ((size_total_host + EXTRA_SPACE) > accel_limit_host) memory_limit_hit = 1;
+      if (size_total_host > accel_limit_host) memory_limit_hit = 1;
+
+      //printf ("%zu %zu %d %d\n", size_total, device_param->device_available_mem, kernel_accel_max, kernel_threads_max);
 
       if (memory_limit_hit == 1)
       {
-        kernel_accel_max--;
+        if (kernel_accel_max == kernel_accel_min)
+        {
+          if ((kernel_threads_max > kernel_threads_min) && (kernel_threads_max >= (device_param->kernel_preferred_wgs_multiple * 2)))
+          {
+            kernel_threads_max -= device_param->kernel_preferred_wgs_multiple;
+
+            kernel_accel_max = kernel_accel_max_sav;
+          }
+          else
+          {
+            break;
+          }
+        }
+        else
+        {
+          kernel_accel_max--;
+        }
 
         continue;
       }
@@ -16375,13 +16401,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       break;
     }
 
-    if (kernel_accel_max < kernel_accel_min)
+    if (memory_limit_hit == 1)
     {
       event_log_error (hashcat_ctx, "* Device #%u: Not enough allocatable device memory or free host memory for mapping.", device_id + 1);
 
       backend_memory_hit_warnings++;
 
       device_param->skipped_warning = true;
+
       continue;
     }
 
@@ -16392,7 +16419,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     {
       while (kernel_accel_max > kernel_accel_min)
       {
-        const u64 kernel_power_max = hardware_power_max * kernel_accel_max;
+        const u64 kernel_power_max = device_param->device_processors * kernel_accel_max;
 
         if (kernel_power_max > hashes->salts_cnt)
         {
@@ -16405,6 +16432,12 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    device_param->kernel_threads_min = kernel_threads_min;
+    device_param->kernel_threads_max = kernel_threads_max;
+
+    const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE)     ? 1 : device_param->device_processors)
+                                 * ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : device_param->kernel_threads_max);
+
     device_param->kernel_accel_min = kernel_accel_min;
     device_param->kernel_accel_max = kernel_accel_max;
 
diff --git a/src/modules/module_00020.c b/src/modules/module_00020.c
index 3d514be55..f20a23d75 100644
--- a/src/modules/module_00020.c
+++ b/src/modules/module_00020.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_RAW_HASH_SALTED;
 static const char *HASH_NAME      = "md5($salt.$pass)";
 static const u64   KERN_TYPE      = 20;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_00021.c b/src/modules/module_00021.c
index a9072fb4e..ce19d37ab 100644
--- a/src/modules/module_00021.c
+++ b/src/modules/module_00021.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_FORUM_SOFTWARE;
 static const char *HASH_NAME      = "osCommerce, xt:Commerce";
 static const u64   KERN_TYPE      = 20;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_00022.c b/src/modules/module_00022.c
index 4252f2f6d..d5742c8f0 100644
--- a/src/modules/module_00022.c
+++ b/src/modules/module_00022.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "Juniper NetScreen/SSG (ScreenOS)";
 static const u64   KERN_TYPE      = 20;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_00023.c b/src/modules/module_00023.c
index b50bf7a84..d6b2f281a 100644
--- a/src/modules/module_00023.c
+++ b/src/modules/module_00023.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_IMS;
 static const char *HASH_NAME      = "Skype";
 static const u64   KERN_TYPE      = 20;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_00024.c b/src/modules/module_00024.c
index a2c4d2e90..f3da3ba75 100644
--- a/src/modules/module_00024.c
+++ b/src/modules/module_00024.c
@@ -20,6 +20,7 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_EAS;
 static const char *HASH_NAME      = "SolarWinds Serv-U";
 static const u64   KERN_TYPE      = 20;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT
                                   | OPTI_TYPE_PRECOMPUTE_INIT
                                   | OPTI_TYPE_EARLY_SKIP
                                   | OPTI_TYPE_NOT_ITERATED
diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c
index b66dddff1..286eda992 100644
--- a/src/modules/module_01500.c
+++ b/src/modules/module_01500.c
@@ -20,7 +20,8 @@ static const u32   DGST_SIZE      = DGST_SIZE_4_4; // originally DGST_SIZE_4_2
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_OS;
 static const char *HASH_NAME      = "descrypt, DES (Unix), Traditional DES";
 static const u64   KERN_TYPE      = 1500;
-static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_REGISTER_LIMIT;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_TM_KERNEL

From 03ea85dc688f42734528c3d735d84e4f803dd541 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EC=97=84=ED=83=9C=EC=9A=A9?= <lg65438805@gmail.com>
Date: Tue, 1 Jul 2025 18:26:07 +0900
Subject: [PATCH 54/83] [FIX] Prevent NULL dereference in read_restore() via
 hcmalloc

---
 src/restore.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/restore.c b/src/restore.c
index f2a59f7c1..2ca95ba92 100644
--- a/src/restore.c
+++ b/src/restore.c
@@ -89,6 +89,14 @@ static int read_restore (hashcat_ctx_t *hashcat_ctx)
 
   char *buf = (char *) hcmalloc (HCBUFSIZ_LARGE);
 
+  if (buf == NULL)
+  {
+    event_log_error(hashcat_ctx, "hcmalloc: %s", strerror(errno));
+    hc_fclose(&fp);
+    hcfree(rd->argv);
+    return -1;
+  }
+
   for (u32 i = 0; i < rd->argc; i++)
   {
     if (hc_fgets (buf, HCBUFSIZ_LARGE - 1, &fp) == NULL)

From 4d4bb71d48b5c90bf36659dfa998c095bebfb5da Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 1 Jul 2025 16:02:40 +0200
Subject: [PATCH 55/83] Use a combination of device_processor == 1 and
 CL_DEVICE_HOST_UNIFIED_MEMORY == 1 to determine if the device is an APU. In
 that case, overwrite the device_processor count with 64 to correct the
 invalid value of 1 reported by the HIP and OpenCL runtimes. The value 64 is
 obtained from rocminfo. This might not be accurate for other APUs beyond the
 one used as a reference, but oversubscribing an APU does not negatively
 affect performance, so this should be acceptable. Also fixed a syntax error
 in inc_vendor.h related to funnel shift.

---
 OpenCL/inc_vendor.h |  6 ------
 src/backend.c       | 50 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h
index a0df70d23..40414cbb2 100644
--- a/OpenCL/inc_vendor.h
+++ b/OpenCL/inc_vendor.h
@@ -218,15 +218,9 @@ using namespace metal;
 #define s3 w
 #endif
 
-// funnelshift is nv's bitalign starting from sm 70
-
-#ifdef IS_NV
 #if HAS_SHFW == 1
 #define USE_FUNNELSHIFT
 #endif
-#elif IS_HIP
-#define USE_FUNNELSHIFT
-#endif
 
 // some algorithms do not like this, eg 150, 1100, ...
 
diff --git a/src/backend.c b/src/backend.c
index a9318be79..8213f4ea5 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6094,6 +6094,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
       device_param->device_processors = device_processors;
 
+      if ((device_param->device_processors == 1) && (device_param->device_host_unified_memory == 1))
+      {
+        // APUs return some weird numbers. These values seem more appropriate (from rocminfo)
+        //Compute Unit:            2
+        //SIMDs per CU:            2
+        //Wavefront Size:          32(0x20)
+        //Max Waves Per CU:        32(0x20)
+
+        device_param->device_processors = 2 * 32;
+      }
+
       // device_global_mem, device_maxmem_alloc, device_available_mem
 
       size_t bytes = 0;
@@ -6399,7 +6410,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->has_lop3  = false;
       device_param->has_mov64 = false;
       device_param->has_prmt  = false;
-      device_param->has_shfw  = false;
+      device_param->has_shfw  = prop.arch.hasFunnelShift;
 
       // device_available_mem
 
@@ -7218,6 +7229,19 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         device_param->opencl_device_c_version = opencl_device_c_version;
 
+        // device_host_unified_memory
+
+        cl_bool device_host_unified_memory = false;
+
+        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1)
+        {
+          device_param->skipped = true;
+
+          continue;
+        }
+
+        device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0;
+
         // max_compute_units
 
         cl_uint device_processors = 0;
@@ -7231,6 +7255,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
         device_param->device_processors = device_processors;
 
+        if ((device_param->device_processors == 1) && (device_param->device_host_unified_memory == 1))
+        {
+          // APUs return some weird numbers. These values seem more appropriate (from rocminfo)
+          //Compute Unit:            2
+          //SIMDs per CU:            2
+          //Wavefront Size:          32(0x20)
+          //Max Waves Per CU:        32(0x20)
+
+          device_param->device_processors = 2 * 32;
+        }
+
         #if defined (__APPLE__)
         if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
@@ -7258,19 +7293,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         }
         #endif // __APPLE__
 
-        // device_host_unified_memory
-
-        cl_bool device_host_unified_memory = false;
-
-        if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1)
-        {
-          device_param->skipped = true;
-
-          continue;
-        }
-
-        device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0;
-
         // device_global_mem
 
         cl_ulong device_global_mem = 0;

From 7c9c1af30f01eb63188d2c76b0ca4966544e1c5f Mon Sep 17 00:00:00 2001
From: Royce Williams <royce@techsolvency.com>
Date: Tue, 1 Jul 2025 18:21:02 -0800
Subject: [PATCH 56/83] fix type mismatch in msg

---
 src/terminal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 9cd362a9d..b76c64048 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2292,13 +2292,13 @@ void status_display (hashcat_ctx_t *hashcat_ctx)
   if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
   {
     event_log_info (hashcat_ctx,
-      "Kernel.Feature...: Optimized Kernel (max length: %s)",
+      "Kernel.Feature...: Optimized Kernel (max length: %u)",
       hashconfig->pw_max);
   }
   else
   {
     event_log_info (hashcat_ctx,
-      "Kernel.Feature...: Pure Kernel (max length: %s)",
+      "Kernel.Feature...: Pure Kernel (max length: %u)",
       hashconfig->pw_max);
   }
 

From 76869e5099273d83f7b3cc23b6ac734a8c840495 Mon Sep 17 00:00:00 2001
From: Royce Williams <royce@techsolvency.com>
Date: Tue, 1 Jul 2025 18:31:20 -0800
Subject: [PATCH 57/83] skip preamble if --backend_info and --machine

---
 src/terminal.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 2976bc7c2..e929a6ab8 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -88,8 +88,11 @@ void welcome_screen (hashcat_ctx_t *hashcat_ctx, const char *version_tag)
   }
   else if (user_options->backend_info > 0)
   {
-    event_log_info (hashcat_ctx, "%s (%s) starting in backend information mode", PROGNAME, version_tag);
-    event_log_info (hashcat_ctx, NULL);
+    if (user_options->machine_readable == false)
+    {
+      event_log_info (hashcat_ctx, "%s (%s) starting in backend information mode", PROGNAME, version_tag);
+      event_log_info (hashcat_ctx, NULL);
+    }
   }
   else if (user_options->hash_mode_chgd == false)
   {

From 3c1649ccc8ddee97348c624fc0913682a77de692 Mon Sep 17 00:00:00 2001
From: Pelle Kuiters <p.barens@nfi.nl>
Date: Wed, 2 Jul 2025 10:47:00 +0200
Subject: [PATCH 58/83] GPU support for Argon2id for NVIDIA CUDA

---
 OpenCL/inc_common.cl       |  83 ++++++++
 OpenCL/inc_common.h        |   4 +
 OpenCL/inc_hash_argon2.cl  | 383 +++++++++++++++++++++++++++++++++++++
 OpenCL/inc_hash_argon2.h   |  84 ++++++++
 OpenCL/inc_platform.cl     |  37 +++-
 OpenCL/inc_platform.h      |   5 +-
 OpenCL/m34000-pure.cl      |  93 +++++++++
 src/autotune.c             |   2 +-
 src/backend.c              |  13 +-
 src/modules/module_34000.c | 363 +++++++++++++++++++++++++++++++++++
 10 files changed, 1055 insertions(+), 12 deletions(-)
 create mode 100644 OpenCL/inc_hash_argon2.cl
 create mode 100644 OpenCL/inc_hash_argon2.h
 create mode 100644 OpenCL/m34000-pure.cl
 create mode 100644 src/modules/module_34000.c

diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index a23efd42b..1bb07b896 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -1946,6 +1946,19 @@ DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c)
 
 #endif
 
+/**
+ * arithmetic operations
+ */
+
+DECLSPEC u32 hc_umulhi (const u32 x, const u32 y)
+{
+#if defined IS_CUDA
+  return __umulhi (x, y);
+#else
+  return h32_from_64_S ((u64) x * (u64) y);
+#endif
+}
+
 /**
  * pure scalar functions
  */
@@ -41979,6 +41992,76 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u
       w0[1] = 0;
       w0[0] = 0;
       break;
+
+    case 16:
+      w7[3] = hc_byte_perm_S (w3[2], w3[3], selector);
+      w7[2] = hc_byte_perm_S (w3[1], w3[2], selector);
+      w7[1] = hc_byte_perm_S (w3[0], w3[1], selector);
+      w7[0] = hc_byte_perm_S (w2[3], w3[0], selector);
+      w6[3] = hc_byte_perm_S (w2[2], w2[3], selector);
+      w6[2] = hc_byte_perm_S (w2[1], w2[2], selector);
+      w6[1] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w6[0] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w5[3] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w5[2] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w5[1] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w5[0] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w4[3] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w4[2] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w4[1] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w4[0] = hc_byte_perm_S (    0, w0[0], selector);
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+      break;
+
+    case 17:
+      w7[3] = hc_byte_perm_S (w3[1], w3[2], selector);
+      w7[2] = hc_byte_perm_S (w3[0], w3[1], selector);
+      w7[1] = hc_byte_perm_S (w2[3], w3[0], selector);
+      w7[0] = hc_byte_perm_S (w2[2], w2[3], selector);
+      w6[3] = hc_byte_perm_S (w2[1], w2[2], selector);
+      w6[2] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w6[1] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w6[0] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w5[3] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w5[2] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w5[1] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w5[0] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w4[3] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w4[2] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w4[1] = hc_byte_perm_S (    0, w0[0], selector);
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+      break;
   }
   #endif
 }
diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h
index 0b51fbd29..c24ecb524 100644
--- a/OpenCL/inc_common.h
+++ b/OpenCL/inc_common.h
@@ -284,6 +284,10 @@ DECLSPEC u32  hc_bfe_S          (const u32  a, const u32  b, const u32  c);
 DECLSPEC u32x hc_lop_0x96       (const u32x a, const u32x b, const u32x c);
 DECLSPEC u32  hc_lop_0x96_S     (const u32  a, const u32  b, const u32  c);
 
+// arithmetic operations
+
+DECLSPEC u32  hc_umulhi (const u32 x, const u32 y);
+
 // legacy common code
 
 DECLSPEC int ffz (const u32 v);
diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl
new file mode 100644
index 000000000..a82531a2c
--- /dev/null
+++ b/OpenCL/inc_hash_argon2.cl
@@ -0,0 +1,383 @@
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ *
+ * Warp code based on original work by Ondrej Mosnáček
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+#include "inc_common.h"
+#include "inc_hash_blake2b.h"
+#include "inc_hash_argon2.h"
+
+DECLSPEC void argon2_initial_block (const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks)
+{
+  blake2b_ctx_t ctx;
+
+  blake2b_init (&ctx);
+
+  u64 blake_buf[16] = { 0 };
+
+  blake_buf[0] = sizeof(argon2_block_t);
+
+  blake2b_update (&ctx, (u32 *) blake_buf, 4);
+  blake2b_update (&ctx, in, 64);
+
+  blake_buf[0] = hl32_to_64 (lane, blocknum);
+
+  blake2b_update (&ctx, (u32 *) blake_buf, 8);
+
+  blake2b_final (&ctx);
+
+  u64 *out = blocks[(blocknum * parallelism) + lane].values;
+
+  out[0] = ctx.h[0];
+  out[1] = ctx.h[1];
+  out[2] = ctx.h[2];
+  out[3] = ctx.h[3];
+
+  for (u32 off = 4; off < 124; off += 4)
+  {
+    for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx];
+
+    blake2b_init (&ctx);
+    blake2b_transform (ctx.h, blake_buf, 64, BLAKE2B_FINAL);
+
+    out[off + 0] = ctx.h[0];
+    out[off + 1] = ctx.h[1];
+    out[off + 2] = ctx.h[2];
+    out[off + 3] = ctx.h[3];
+  }
+
+  out[124] = ctx.h[4];
+  out[125] = ctx.h[5];
+  out[126] = ctx.h[6];
+  out[127] = ctx.h[7];
+}
+
+DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, u64 *blockhash)
+{
+  blake2b_ctx_t ctx;
+  blake2b_init (&ctx);
+
+  u32 option_input[32] = { 0 };
+
+  option_input[0] = options->parallelism;
+  option_input[1] = options->digest_len;
+  option_input[2] = options->memory_usage_in_kib;
+  option_input[3] = options->iterations;
+  option_input[4] = options->version;
+  option_input[5] = options->type;
+
+  blake2b_update (&ctx, option_input, 24);
+
+  u32 len_input[32] = { 0 };
+
+  len_input[0] = pw->pw_len;
+
+  blake2b_update (&ctx, len_input, 4);
+  blake2b_update_global (&ctx, pw->i, pw->pw_len);
+
+  len_input[0] = salt->salt_len;
+
+  blake2b_update (&ctx, len_input, 4);
+  blake2b_update_global (&ctx, salt->salt_buf, salt->salt_len);
+
+  len_input[0] = 0;
+
+  blake2b_update (&ctx, len_input, 4); // secret (K)
+  blake2b_update (&ctx, len_input, 4); // associated data (X)
+
+  blake2b_final (&ctx);
+
+  for (u32 idx = 0; idx < 8; idx++) blockhash[idx] = ctx.h[idx];
+}
+
+DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt,
+                           const argon2_options_t *options, GLOBAL_AS argon2_block_t *out)
+{
+  u64 blockhash[16] = { 0 };
+
+  argon2_initial_hash (pw, salt, options, blockhash);
+
+  // Generate the first two blocks of each lane
+  for (u32 lane = 0; lane < options->parallelism; lane++)
+  {
+    argon2_initial_block ((u32 *) blockhash, lane, 0, options->parallelism, out);
+    argon2_initial_block ((u32 *) blockhash, lane, 1, options->parallelism, out);
+  }
+}
+
+DECLSPEC u64 trunc_mul (u64 x, u64 y)
+{
+  const u32 xlo = (u32) x;
+  const u32 ylo = (u32) y;
+  return hl32_to_64_S (hc_umulhi (xlo, ylo), (u32) (xlo * ylo));
+}
+
+DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const argon2_pos_t *pos, u32 index, u64 pseudo_random)
+{
+  u32 ref_lane;
+  u32 ref_area;
+  u32 ref_index;
+
+  if ((pos->pass == 0) && (pos->slice == 0))
+  {
+    ref_lane = pos->lane;
+  }
+  else
+  {
+    ref_lane = h32_from_64_S (pseudo_random) % options->parallelism;
+  }
+
+  ref_area  = (pos->pass == 0) ? pos->slice : (ARGON2_SYNC_POINTS - 1);
+  ref_area *= options->segment_length;
+
+  if ((ref_lane == pos->lane) || (index == 0))
+  {
+      ref_area += (index - 1);
+  }
+
+  const u32 j1 = l32_from_64_S (pseudo_random);
+  ref_index = (ref_area - 1 - hc_umulhi (ref_area, hc_umulhi (j1, j1)));
+
+  if (pos->pass > 0)
+  {
+    ref_index += (pos->slice + 1) * options->segment_length;
+
+    if (ref_index >= options->lane_length)
+    {
+      ref_index -= options->lane_length;
+    }
+  }
+
+  return (options->parallelism * ref_index) + ref_lane;
+}
+
+DECLSPEC void swap_u64 (u64 *x, u64 *y)
+{
+  u64 tmp = *x;
+  *x = *y;
+  *y = tmp;
+}
+
+DECLSPEC void transpose_permute_block (u64 R[4], int thread)
+{
+  if (thread & 0x08)
+  {
+    swap_u64 (&R[0], &R[2]);
+    swap_u64 (&R[1], &R[3]);
+  }
+  if (thread & 0x04)
+  {
+    swap_u64 (&R[0], &R[1]);
+    swap_u64 (&R[2], &R[3]);
+  }
+}
+
+DECLSPEC int argon2_shift (int idx, int thread)
+{
+  const int delta = ((idx & 0x02) << 3) + (idx & 0x01);
+  return (thread & 0x0e) | (((thread & 0x11) + delta + 0x0e) & 0x11);
+}
+
+DECLSPEC void argon2_hash_block (u64 R[4], int thread)
+{
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], thread ^ (idx << 2));
+
+  transpose_permute_block (R, thread);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], thread ^ (idx << 2));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx],  (thread & 0x1c) | ((thread + idx) & 0x03));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], ((thread & 0x1c) | ((thread - idx) & 0x03)) ^ (idx << 2));
+
+  transpose_permute_block (R, thread);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], thread ^ (idx << 2));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], argon2_shift (idx, thread));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], argon2_shift ((4 - idx), thread));
+}
+
+DECLSPEC void argon2_next_addresses (const argon2_options_t *options, const argon2_pos_t *pos, u32 *addresses, u32 start_index, u32 thread)
+{
+  u64 Z[4] = { 0 };
+  u64 tmp[4];
+
+  switch (thread)
+  {
+    case 0:  Z[0] = pos->pass;                   break;
+    case 1:  Z[0] = pos->lane;                   break;
+    case 2:  Z[0] = pos->slice;                  break;
+    case 3:  Z[0] = options->memory_block_count; break;
+    case 4:  Z[0] = options->iterations;         break;
+    case 5:  Z[0] = options->type;               break;
+    case 6:  Z[0] = (start_index / 128) + 1;     break;
+    default: Z[0] = 0;                           break;
+  }
+
+  tmp[0] = Z[0];
+
+  argon2_hash_block (Z, thread);
+
+  Z[0]  ^= tmp[0];
+
+  for (u32 idx = 0; idx < 4; idx++) tmp[idx] = Z[idx];
+
+  argon2_hash_block (Z, thread);
+
+  for (u32 idx = 0; idx < 4; idx++) Z[idx]  ^= tmp[idx];
+
+  for (u32 i = 0, index = (start_index + thread); i < 4; i++, index += THREADS_PER_LANE)
+  {
+    addresses[i] = argon2_ref_address (options, pos, index, Z[i]);
+  }
+}
+
+DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
+{
+  switch (index)
+  {
+    case 0:
+      return array[0];
+    case 1:
+      return array[1];
+    case 2:
+      return array[2];
+    case 3:
+      return array[3];
+  }
+}
+
+DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 thread)
+{
+  // Apply wrap-around to previous block index if the current block is the first block in the lane
+  const u32 prev_in_lane = (index_in_lane == 0) ? (options->lane_length - 1) : (index_in_lane - 1);
+
+  argon2_block_t *prev_block = &blocks[(prev_in_lane * options->parallelism) + lane];
+
+  for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + thread];
+
+  return &blocks[(index_in_lane * options->parallelism) + lane];
+}
+
+DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4],
+                                      u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 thread)
+{
+  for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism)
+  {
+    u32 ref_address;
+
+    if (indep_addr)
+    {
+      ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % ARGON2_SYNC_POINTS);
+      ref_address = __shfl_sync (FULL_MASK, ref_address, index);
+    }
+    else
+    {
+      ref_address = argon2_ref_address (options, pos, index, R[0]);
+      ref_address = __shfl_sync (FULL_MASK, ref_address, 0);
+    }
+
+    GLOBAL_AS const argon2_block_t *ref_block = &blocks[ref_address];
+
+    u64 tmp[4] = { 0 };
+
+    // First pass is overwrite, next passes are XOR with previous
+    if ((pos->pass > 0) && (options->version != ARGON2_VERSION_10))
+    {
+      for (u32 idx = 0; idx < 4; idx++) tmp[idx]  = cur_block->values[(idx * THREADS_PER_LANE) + thread];
+    }
+
+    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= ref_block->values[(idx * THREADS_PER_LANE) + thread];
+
+    for (u32 idx = 0; idx < 4; idx++) tmp[idx] ^= R[idx];
+
+    argon2_hash_block (R, thread);
+
+    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= tmp[idx];
+
+    for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + thread] = R[idx];
+  }
+}
+
+DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos)
+{
+  const u32  thread       = get_local_id(0);
+
+  // We have already generated the first two blocks of each lane (for the first pass)
+  const u32 skip_blocks   = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0;
+  const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks;
+
+  u64 R[4];
+
+  GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, thread);
+
+  if ((options->type == TYPE_I) || ((options->type == TYPE_ID) && (pos->pass == 0) && (pos->slice <= 1)))
+  {
+    for (u32 block_index = 0; block_index < options->segment_length; block_index += 128)
+    {
+      const u32 start_index = (block_index == 0) ? skip_blocks : block_index;
+      const u32 end_index   = MIN(((start_index | 127) + 1), options->segment_length);
+
+      u32 addresses[4];
+
+      argon2_next_addresses (options, pos, addresses, block_index, thread);
+      argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, thread);
+
+      cur_block += (end_index - start_index) * options->parallelism;
+    }
+  }
+  else
+  {
+    u32 addresses[4] = { 0 };
+
+    argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, thread);
+  }
+}
+
+DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out)
+{
+  const u32 lane_length = options->lane_length;
+  const u32 lanes = options->parallelism;
+
+  argon2_block_t final_block = { };
+
+  for (u32 l = 0; l < lanes; l++)
+  {
+    for (u32 idx = 0; idx < 128; idx++) final_block.values[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx];
+  }
+
+  u32 output_len [32] = {0};
+  output_len [0] = options->digest_len;
+
+  blake2b_ctx_t ctx;
+  blake2b_init (&ctx);
+
+  // Override default (0x40) value in BLAKE2b
+  ctx.h[0] ^= 0x40 ^ options->digest_len; 
+
+  blake2b_update (&ctx, output_len, 4);
+  blake2b_update (&ctx, (u32 *) final_block.values, sizeof(final_block));
+
+  blake2b_final (&ctx);
+
+  for (int i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1)
+  {
+    out [i + 0] = l32_from_64_S (ctx.h[idx]);
+    out [i + 1] = h32_from_64_S (ctx.h[idx]);
+  }
+}
diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h
new file mode 100644
index 000000000..8980068cd
--- /dev/null
+++ b/OpenCL/inc_hash_argon2.h
@@ -0,0 +1,84 @@
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ */
+
+#ifndef INC_HASH_ARGON2_H
+#define INC_HASH_ARGON2_H
+
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+#define ARGON2_VERSION_10 0x10
+#define ARGON2_VERSION_13 0x13
+
+#define THREADS_PER_LANE 32
+#define FULL_MASK 0xffffffff
+
+#define BLAKE2B_OUTBYTES 64
+#define ARGON2_SYNC_POINTS 4
+#define ARGON2_ADDRESSES_IN_BLOCK 128
+
+#define TYPE_D  0
+#define TYPE_I  1
+#define TYPE_ID 2
+
+#define ARGON2_G(a,b,c,d)                \
+{                                        \
+  a = a + b + 2 * trunc_mul(a, b);       \
+  d = blake2b_rot32_S (d ^ a);           \
+  c = c + d + 2 * trunc_mul(c, d);       \
+  b = blake2b_rot24_S (b ^ c);           \
+  a = a + b + 2 * trunc_mul(a, b);       \
+  d = blake2b_rot16_S (d ^ a);           \
+  c = c + d + 2 * trunc_mul(c, d);       \
+  b = hc_rotr64_S (b ^ c, 63);           \
+}
+
+#define ARGON2_P()                       \
+{                                        \
+  ARGON2_G(v[0], v[4], v[8], v[12]);     \
+  ARGON2_G(v[1], v[5], v[9], v[13]);     \
+  ARGON2_G(v[2], v[6], v[10], v[14]);    \
+  ARGON2_G(v[3], v[7], v[11], v[15]);    \
+                                         \
+  ARGON2_G(v[0], v[5], v[10], v[15]);    \
+  ARGON2_G(v[1], v[6], v[11], v[12]);    \
+  ARGON2_G(v[2], v[7], v[8], v[13]);     \
+  ARGON2_G(v[3], v[4], v[9], v[14]);     \
+}
+
+typedef struct argon2_block
+{
+  u64 values[128];
+
+} argon2_block_t;
+
+typedef struct argon2_options
+{
+  u32 type;
+  u32 version;
+
+  u32 iterations;
+  u32 parallelism;
+  u32 memory_usage_in_kib;
+
+  u32 segment_length;
+  u32 lane_length;
+  u32 memory_block_count;
+  u32 digest_len;
+
+} argon2_options_t;
+
+typedef struct argon2_pos
+{
+  u32 pass;
+  u32 slice;
+  u32 lane;
+
+} argon2_pos_t;
+
+DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
+DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos);
+DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out);
+
+#endif // INC_HASH_ARGON2_H
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 9af10e9ba..36303296f 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -104,9 +104,17 @@ DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
   return atomicOr (p, val);
 }
 
-DECLSPEC size_t get_group_id  (const u32 dimindx __attribute__((unused)))
+DECLSPEC size_t get_group_id (const u32 dimindx)
 {
-  return blockIdx.x;
+  switch (dimindx)
+  {
+    case 0:
+      return blockIdx.x;
+    case 1:
+      return blockIdx.y;
+    case 2:
+      return blockIdx.z;
+  }  
 }
 
 DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
@@ -114,15 +122,30 @@ DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
   return (blockIdx.x * blockDim.x) + threadIdx.x;
 }
 
-DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
+DECLSPEC size_t get_local_id (const u32 dimindx)
 {
-  return threadIdx.x;
+  switch (dimindx)
+  {
+    case 0:
+      return threadIdx.x;
+    case 1:
+      return threadIdx.y;
+    case 2:
+      return threadIdx.z;
+  }
 }
 
-DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
+DECLSPEC size_t get_local_size (const u32 dimindx)
 {
-  // verify
-  return blockDim.x;
+  switch (dimindx)
+  {
+    case 0:
+      return blockDim.x;
+    case 1:
+      return blockDim.y;
+    case 2:
+      return blockDim.z;
+  }  
 }
 
 DECLSPEC u32x rotl32 (const u32x a, const int n)
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index cc6489afe..4e203ff18 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -27,8 +27,9 @@ DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_or  (volatile GLOBAL_AS u32 *p, volatile const u32 val);
 
 DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
-DECLSPEC size_t get_local_id    (const u32 dimindx __attribute__((unused)));
-DECLSPEC size_t get_local_size  (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_group_id    (const u32 dimindx);
+DECLSPEC size_t get_local_id    (const u32 dimindx);
+DECLSPEC size_t get_local_size  (const u32 dimindx);
 
 DECLSPEC u32x rotl32   (const u32x a, const int n);
 DECLSPEC u32x rotr32   (const u32x a, const int n);
diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
new file mode 100644
index 000000000..8c17a523d
--- /dev/null
+++ b/OpenCL/m34000-pure.cl
@@ -0,0 +1,93 @@
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ */
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl)
+#include M2S(INCLUDE_PATH/inc_hash_argon2.cl)
+#endif
+
+#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl)
+#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl)
+
+typedef struct argon2_tmp
+{
+#ifndef ARGON2_TMP_ELEM
+#define ARGON2_TMP_ELEM 1
+#endif
+
+  argon2_block_t blocks[ARGON2_TMP_ELEM];
+
+} argon2_tmp_t;
+
+KERNEL_FQ void m34000_init (_KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
+{
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
+
+  argon2_init (&pws[gid], &salt_bufs[SALT_POS_HOST], &options, tmps[gid].blocks);
+}
+
+KERNEL_FQ void m34000_loop (_KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
+{
+  const u64 gid = get_group_id (0);
+  const u64 lid = get_local_id (1);
+  const u64 lsz = get_local_size (1);
+
+  if (gid >= GID_CNT) return;
+
+  argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
+
+  options.parallelism = ARGON2_PARALLELISM;
+
+  argon2_pos_t pos;
+
+  pos.pass   = (LOOP_POS / ARGON2_SYNC_POINTS);
+  pos.slice  = (LOOP_POS % ARGON2_SYNC_POINTS);
+
+  for (u32 i = 0; i < LOOP_CNT; i++)
+  {
+    for (pos.lane = lid; pos.lane < options.parallelism; pos.lane += lsz)
+    {
+      argon2_fill_segment (tmps[gid].blocks, &options, &pos);
+    }
+
+    SYNC_THREADS ();
+
+    pos.slice++;
+
+    if (pos.slice == ARGON2_SYNC_POINTS)
+    {
+      pos.slice = 0;
+      pos.pass++;
+    }
+  }
+}
+
+KERNEL_FQ void m34000_comp ( _KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
+{
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 out[8];
+
+  const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
+
+  argon2_final (tmps[gid].blocks, &options, out);
+
+  const u32 r0 = out[0];
+  const u32 r1 = out[1];
+  const u32 r2 = out[2];
+  const u32 r3 = out[3];
+
+  #define il_pos 0
+
+  #include COMPARE_M
+}
diff --git a/src/autotune.c b/src/autotune.c
index a599e65be..891baa472 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -107,7 +107,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   const double target_msec = backend_ctx->target_msec;
 
-  const u32 kernel_accel_min = device_param->kernel_accel_min;
+  const u32 kernel_accel_min =  (hashconfig->opts_type & OPTS_TYPE_MAXIMUM_ACCEL) ? device_param->kernel_accel_max : device_param->kernel_accel_min;
   const u32 kernel_accel_max = device_param->kernel_accel_max;
 
   const u32 kernel_loops_min = device_param->kernel_loops_min;
diff --git a/src/backend.c b/src/backend.c
index 8213f4ea5..79b89dd04 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2662,7 +2662,16 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
 
-    if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
+    if ((kern_run == KERN_RUN_2) && (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP))
+    {
+      const u32 warp_size = device_param->kernel_preferred_wgs_multiple;
+
+      if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num, 1, 1, warp_size, blockDimY, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
+    }
+    else
+    {
+      if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
+    }
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
 
@@ -16225,7 +16234,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     // Still not 100% sure about the 64MiB here
 
-    const u64 size_device_extra = MAX ((64ULL * 1024 * 1024), size_device_extra1234);
+    const u64 size_device_extra = MAX ((1024 * 1024 * 1024), size_device_extra1234);
 
     // we will first decrease accel and when reached that limit, we will decrease threads
     // when we decrease limit this will restore accel_max
diff --git a/src/modules/module_34000.c b/src/modules/module_34000.c
new file mode 100644
index 000000000..64056b36f
--- /dev/null
+++ b/src/modules/module_34000.c
@@ -0,0 +1,363 @@
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+#define ARGON2_SYNC_POINTS  4
+#define ARGON2_BLOCK_SIZE   1024
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_8;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_GENERIC_KDF;
+static const char *HASH_NAME      = "Argon2ID";
+static const u64   KERN_TYPE      = 34000;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_SLOW_HASH_DIMY_LOOP;
+static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
+                                  | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_NATIVE_THREADS
+                                  | OPTS_TYPE_MP_MULTI_DISABLE
+                                  | OPTS_TYPE_MAXIMUM_ACCEL;
+static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "$argon2id$v=19$m=65536,t=3,p=1$FBMjI4RJBhIykCgol1KEJA$2ky5GAdhT1kH4kIgPN/oERE3Taiy43vNN70a3HpiKQU";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+typedef struct argon2_options
+{
+  u32 type;
+  u32 version;
+
+  u32 iterations;
+  u32 parallelism;
+  u32 memory_usage_in_kib;
+
+  u32 segment_length;
+  u32 lane_length;
+  u32 memory_block_count;
+
+  u32 digest_len;
+
+} argon2_options_t;
+
+static const char *SIGNATURE_ARGON2ID = "$argon2id$";
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (argon2_options_t);
+
+  return esalt_size;
+}
+
+u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_min = 1;
+
+  return kernel_threads_min;
+}
+
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 1;
+
+  return kernel_threads_max;
+}
+
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 1;
+
+  return kernel_loops_min;
+}
+
+u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_max = 1;
+
+  return kernel_loops_max;
+}
+
+bool module_warmup_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const bool warmup_disable = true;
+
+  return warmup_disable;
+}
+
+u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = 0; // we'll add some later
+
+  return tmp_size;
+}
+
+u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
+{
+  argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
+
+  const u32 memory_block_count = options[0].memory_block_count;
+
+  // we need to check that all hashes have the same memory requirement
+  for (u32 i = 1; i < hashes->salts_cnt; i++)
+  {
+    if (options[i].memory_block_count != memory_block_count) return (1ULL << 63) + i;
+  }
+
+  // now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
+
+  if (user_options->self_test == true)
+  {
+    argon2_options_t *st_options = (argon2_options_t *) hashes->st_esalts_buf;
+
+    if (st_options[0].memory_block_count != memory_block_count) return (1ULL << 62);
+  }
+
+  const u64 tmp_size = ARGON2_BLOCK_SIZE * memory_block_count;
+
+  return tmp_size;
+}
+
+char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
+
+  char *jit_build_options = NULL;
+
+  hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%" PRIu32 " -D ARGON2_TMP_ELEM=%" PRIu32, options[0].parallelism, options[0].memory_block_count);
+
+  return jit_build_options;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  argon2_options_t *options  = (argon2_options_t *) esalt_buf;
+
+  hc_token_t token;
+
+  memset (&token, 0, sizeof (hc_token_t));
+
+  token.token_cnt  = 7;
+
+  token.signatures_cnt    = 1;
+  token.signatures_buf[0] = SIGNATURE_ARGON2ID;
+
+  token.len[0]     = 10;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  // version
+  token.len[1]     = 4;
+  token.sep[1]     = '$';
+  token.attr[1]    = TOKEN_ATTR_FIXED_LENGTH;
+
+  // memoryUsageInKib
+  token.len_min[2] = 3;
+  token.len_max[2] = 12;
+  token.sep[2]     = ',';
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  // iterations
+  token.len_min[3] = 3;
+  token.len_max[3] = 5;
+  token.sep[3]     = ',';
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  // parallelism
+  token.len_min[4] = 3;
+  token.len_max[4] = 5;
+  token.sep[4]     = '$';
+  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH;
+
+  // salt
+  token.len_min[5] = ((SALT_MIN * 8) / 6) + 0;
+  token.len_max[5] = ((SALT_MAX * 8) / 6) + 3;
+  token.sep[5]     = '$';
+  token.attr[5]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_BASE64A;
+
+  // target hash
+  token.len_min[6] = ((SALT_MIN * 8) / 6) + 0;
+  token.len_max[6] = ((SALT_MAX * 8) / 6) + 3;
+  token.sep[6]     = '$';
+  token.attr[6]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_BASE64A;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  // argon2id config
+  const u8 *ver_pos = token.buf[1];
+  const u8 *mem_pos = token.buf[2];
+  const u8 *it_pos  = token.buf[3];
+  const u8 *par_pos = token.buf[4];
+
+  options->type                = 2; // Only support for Argon2id
+  options->version             = hc_strtoul ((const char *) ver_pos + 2, NULL, 10);
+  options->memory_usage_in_kib = hc_strtoul ((const char *) mem_pos + 2, NULL, 10);
+  options->iterations          = hc_strtoul ((const char *) it_pos  + 2, NULL, 10);
+  options->parallelism         = hc_strtoul ((const char *) par_pos + 2, NULL, 10);
+
+  if (options->version != 19 && options->version != 16) return (PARSER_HASH_VALUE);
+  if (options->memory_usage_in_kib < 1) return (PARSER_HASH_VALUE);
+  if (options->iterations < 1) return (PARSER_HASH_VALUE);
+  if (options->parallelism < 1 || options->parallelism > 32) return (PARSER_HASH_VALUE);
+
+  options->segment_length     = MAX (2, (options->memory_usage_in_kib / (ARGON2_SYNC_POINTS * options->parallelism)));
+  options->lane_length        = options->segment_length * ARGON2_SYNC_POINTS;
+  options->memory_block_count = options->lane_length * options->parallelism;
+
+  // salt
+  const int salt_len = token.len[5];
+  const u8 *salt_pos = token.buf[5];
+
+  salt->salt_iter = options->iterations * ARGON2_SYNC_POINTS;
+  salt->salt_dimy = options->parallelism;
+  salt->salt_len = base64_decode (base64_to_int, (const u8 *) salt_pos, salt_len, (u8 *) salt->salt_buf);
+
+  // digest/ target hash
+  const int digest_len = token.len[6];
+  const u8 *digest_pos = token.buf[6];
+
+  options->digest_len = base64_decode (base64_to_int, (const u8 *) digest_pos, digest_len, (u8 *) digest);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  argon2_options_t *options  = (argon2_options_t *) esalt_buf;
+
+  // salt
+  char base64_salt[512] = { 0 };
+  base64_encode (int_to_base64, (const u8 *) salt->salt_buf, salt->salt_len, (u8 *) base64_salt);
+
+  // digest
+  char base64_digest[512] = { 0 };
+  base64_encode (int_to_base64, (const u8 *) digest, options->digest_len, (u8 *) base64_digest);
+
+  // out
+  u8 *out_buf = (u8 *) line_buf;
+
+  const int out_len = snprintf ((char *) out_buf, line_size, "%sv=%d$m=%d,t=%d,p=%d$%s$%s",
+    SIGNATURE_ARGON2ID,
+    options->version,
+    options->memory_usage_in_kib,
+    options->iterations,
+    options->parallelism,
+    base64_salt,
+    base64_digest);
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_bridge_name              = MODULE_DEFAULT;
+  module_ctx->module_bridge_type              = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice        = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = module_extra_tmp_size;
+  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_min       = module_kernel_threads_min;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = module_warmup_disable;
+}

From d9918d7e44020a526aa7bdc0e364f9c159a0c324 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 2 Jul 2025 11:02:57 +0200
Subject: [PATCH 59/83] Add Argon2 support for OpenCL and HIP
 =====================================

This patch modifies the existing Argon2 plugin, which was initially
designed to work only with CUDA. Supporting OpenCL and HIP required
broader architectural changes.

1. The tmps[] structure no longer holds the "large buffer". This
buffer stored the scratch areas for all password candidates in one
chunk. But we do not need to hold scratch areas for all candidates
simultaneously. All we need to do is hold chunks large enough
per password.

To simplify logic, the buffer is not divided by password count, but
divided by four, which fits within the "1/4 global memory" limit on
some OpenCL runtimes.

Hashcat already had logic to support this, but the buffer needed to be
moved to a different buffer type. It has now been relocated from the
"tmp buffer" to the "extra tmp buffer", following the same strategy
used in newer SCRYPT plugins.

This improves handling across several subcomponents:

  - Hashcat backend divides into four asymmetric buffers, hence the
    name "4-buffer strategy"
  - If the candidate count isn't divisible by 4, leftover candidates are
    assigned to the first (and possibly second and third) buffer
  - No code in the plugin is required, as this was designed for exactly
    such cases where future algorithms require a lot of memory
  - Plugin was rewritten to report the size needed in
    module_extra_tmp_size(), which triggers the "4-buffer" strategy
  - The split is not even, but each part is large enough to hold
    a multiple of a full scratch buffer for a password
  - The kernel code in m34000_init/loop/comp now uses a code block
    that finds its buffer by doing "group_id % 4"
  - Prevents the need to over-allocate memory to avoid OOB access
  - The original "tmps buffer" now holds a small dummy state buffer

2. Replaced warp shuffle instruction

The instruction __shfl_sync() is not available in runtimes
other than CUDA. Some have alternatives, some do not.

To prevent branching per backend runtime, the new general macro
hc__shfl_sync() replaces all calls to __shfl_sync().
This allows us to implement runtime-specific solutions and
take effect at compile time to prevent regressions.

- CUDA:
  We simply map to the original __shfl_sync()

- HIP:
  We map to shfl(), a built-in intrinsic. This instruction doesn't
  support masks like __shfl_sync() does, but masks are not needed
  in Argon2 anyway. It requires an additional parameter, the wavefront
  size. This is natively 64, but we hardcode this to 32 so it aligns
  with NVIDIA's warp size.

- OpenCL:
  - AMD: We have access to the instruction __builtin_amdgcn_ds_bpermute().
    This instruction only supports 32-bit integers, requiring us to
    pack and unpack the 64-bit values manually
  - NVIDIA: We use inline assembly with "shfl.sync.idx.b32". Same as
    with AMD, we need to pack and unpack 32-bit integers. The 64-bit
    support in CUDA is just overloaded and internally does the same thing.
  - Others: We use a shared memory pool and combine it with a barrier.
    This LOCAL_VK pool must be sized at compile time and transported to
    the Argon2 code in "inc_hash_argon2.cl". This required changing all
    function declarations that use shuffles slightly.

Unlock full threading for init and comp kernels
===============================================

This is implemented using a new flag:
  OPTS_TYPE_THREAD_MULTI_DISABLE

Behavior is similar to:
  OPTS_TYPE_MP_MULTI_DISABLE

It simply disables the multiplier normally applied to password batch size.

But attention, this change completely unbinds this effect from the
real threads spawned on the compute device. If the thread count is not
set to 1 in the plugin, it will start autotuning it.

In the case of Argon2, we hard-code it to 32 instead, which also changes
how "warp size" was used in the original implementation, and which is not
compatible with HIP and/or OpenCL. However, we need to maintain this thread
size to utilize warp shuffle and its alternatives in other runtimes.

Benefits:

  - Enables full threading for init and comp kernels (1667 H/s to 1722 H/s)
  - Allows future algorithms to enable parallel processing of single
    password candidates, if supported

Plugin changes:

  - Removed the "hack" where thread count = 1 disabled the multiplier
  - Removed per-device warp count detection code and struct changes
  - Removed warp handling and "num_elements / thread_count" division in
    the run_kernel() function

Simplified autotune logic for Argon2
====================================

The goal is to calculate the maximum number of password candidates that
can run in parallel, constrained only by device memory.

  - Removed all code related to Argon2 from autotune
  - Implemented in "module_extra_tuningdb_block()" (like SCRYPT)
  - We create a tuningdb entry at runtime!
  - Still allows override via tuningdb or CLI
  - Considers register spilling (read at startup)
  - Prevents global-to-host memory swap performance issues

Add Argon2I and ArgonD support
==============================

The kernel prepared from NFI already had support for the different Argon
types. No change was needed.

To support the other Argon2 types, the tokenizer had to be improved to
support a variety of different signatures in the same hash-mode.

Bugfixes
========

- Fixed missing entries in "switch_buffer_by_offset_8x4_le_S()"
- Fixed benchmark hash misdetection for scrypt. This was due to
  outdated logic used in scrypt to detect whether the plugin was
  called from a benchmark session or a regular one
- Fixed a bug in "module_hash_encode()" where Base64 padding '=' was
  retained
- Fixed missing "GLOBAL_AS" / "PRIVATE_AS" casts for OpenCL
- Fixed compiler warnings (e.g., "index_u32x4()", "get_group_id()")
  by adding return values
- Fixed a bug in token.len_max[6], which was allowing decoding
  of a 256-byte data into a 16-byte buffer (digest)

Other improvements
==================

- Added unit test module for automated testing
- Added support to the tokenizer to allow multiple signatures.
  Leave out TOKEN_ATTR_FIXED_LENGTH to enable this in your plugins
- Updated "hc_umulhi()", also exists for HIP
- Renamed "gid" to "bid" when using "get_group_id()" for clarity
- Removed "#ifdef IS_CUDA" as all backends are now supported
- Removed deprecated "OPTS_TYPE_MAXIMUM_ACCEL" attribute

Performance note
================

For testing, I used the self-test hash configured according to the
RFC 9106 recommendation: m=65536, t=3, p=1.

In my benchmarks, the AMD RX 7900 XTX achieved 1401 H/s using the same
hash that was used to test NVIDIA's RTX 4090. The RTX 4090 reached
1722 H/s, making it faster in absolute terms. However, at the time of
writing, it is more than three times as expensive as the 7900 XTX.

It's also worth noting that an older NVIDIA GTX 1080 Ti still reached
565 H/s with the same test vector, and may be found at significantly
lower cost.

Across all tested Argon2 configurations, the performance gap between
the RX 7900 XTX and the RTX 4090 remained proportionally consistent,
indicating a clear linear scaling relationship between the two GPUs.
---
 OpenCL/inc_common.cl         | 513 ++++++++++++++++++++++-
 OpenCL/inc_hash_argon2.cl    | 769 ++++++++++++++++++-----------------
 OpenCL/inc_hash_argon2.h     | 231 +++++++----
 OpenCL/inc_platform.cl       |  53 ++-
 OpenCL/inc_platform.h        |   3 +-
 OpenCL/m34000-pure.cl        |  72 +++-
 src/autotune.c               |   2 +-
 src/backend.c                |  13 +-
 src/modules/module_34000.c   | 198 ++++++---
 src/shared.c                 |  55 ++-
 tools/test_modules/m34000.pm |  79 ++++
 11 files changed, 1423 insertions(+), 565 deletions(-)
 create mode 100644 tools/test_modules/m34000.pm

diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl
index 1bb07b896..b13dc4d5f 100644
--- a/OpenCL/inc_common.cl
+++ b/OpenCL/inc_common.cl
@@ -1952,11 +1952,11 @@ DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c)
 
 DECLSPEC u32 hc_umulhi (const u32 x, const u32 y)
 {
-#if defined IS_CUDA
+  #if defined IS_CUDA || defined IS_HIP
   return __umulhi (x, y);
-#else
+  #else
   return h32_from_64_S ((u64) x * (u64) y);
-#endif
+  #endif
 }
 
 /**
@@ -41418,7 +41418,6 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u
   #endif
 
   #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
-
   const int offset_mod_4 = offset & 3;
 
   const int offset_minus_4 = 4 - offset_mod_4;
@@ -42026,6 +42025,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u
       w0[2] = 0;
       w0[1] = 0;
       w0[0] = 0;
+
       break;
 
     case 17:
@@ -42061,6 +42061,511 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u
       w0[2] = 0;
       w0[1] = 0;
       w0[0] = 0;
+
+      break;
+
+    case 18:
+      w7[3] = hc_byte_perm_S (w3[0], w3[1], selector);
+      w7[2] = hc_byte_perm_S (w2[3], w3[0], selector);
+      w7[1] = hc_byte_perm_S (w2[2], w2[3], selector);
+      w7[0] = hc_byte_perm_S (w2[1], w2[2], selector);
+      w6[3] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w6[2] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w6[1] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w6[0] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w5[3] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w5[2] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w5[1] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w5[0] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w4[3] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w4[2] = hc_byte_perm_S (    0, w0[0], selector);
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 19:
+      w7[3] = hc_byte_perm_S (w2[3], w3[0], selector);
+      w7[2] = hc_byte_perm_S (w2[2], w2[3], selector);
+      w7[1] = hc_byte_perm_S (w2[1], w2[2], selector);
+      w7[0] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w6[3] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w6[2] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w6[1] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w6[0] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w5[3] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w5[2] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w5[1] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w5[0] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w4[3] = hc_byte_perm_S (    0, w0[0], selector);
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 20:
+      w7[3] = hc_byte_perm_S (w2[2], w2[3], selector);
+      w7[2] = hc_byte_perm_S (w2[1], w2[2], selector);
+      w7[1] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w7[0] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w6[3] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w6[2] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w6[1] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w6[0] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w5[3] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w5[2] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w5[1] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w5[0] = hc_byte_perm_S (    0, w0[0], selector);
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 21:
+      w7[3] = hc_byte_perm_S (w2[1], w2[2], selector);
+      w7[2] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w7[1] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w7[0] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w6[3] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w6[2] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w6[1] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w6[0] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w5[3] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w5[2] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w5[1] = hc_byte_perm_S (    0, w0[0], selector);
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 22:
+      w7[3] = hc_byte_perm_S (w2[0], w2[1], selector);
+      w7[2] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w7[1] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w7[0] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w6[3] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w6[2] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w6[1] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w6[0] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w5[3] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w5[2] = hc_byte_perm_S (    0, w0[0], selector);
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 23:
+      w7[3] = hc_byte_perm_S (w1[3], w2[0], selector);
+      w7[2] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w7[1] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w7[0] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w6[3] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w6[2] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w6[1] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w6[0] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w5[3] = hc_byte_perm_S (    0, w0[0], selector);
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 24:
+      w7[3] = hc_byte_perm_S (w1[2], w1[3], selector);
+      w7[2] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w7[1] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w7[0] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w6[3] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w6[2] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w6[1] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w6[0] = hc_byte_perm_S (    0, w0[0], selector);
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 25:
+      w7[3] = hc_byte_perm_S (w1[1], w1[2], selector);
+      w7[2] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w7[1] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w7[0] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w6[3] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w6[2] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w6[1] = hc_byte_perm_S (    0, w0[0], selector);
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 26:
+      w7[3] = hc_byte_perm_S (w1[0], w1[1], selector);
+      w7[2] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w7[1] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w7[0] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w6[3] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w6[2] = hc_byte_perm_S (    0, w0[0], selector);
+      w6[1] = 0;
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 27:
+      w7[3] = hc_byte_perm_S (w0[3], w1[0], selector);
+      w7[2] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w7[1] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w7[0] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w6[3] = hc_byte_perm_S (    0, w0[0], selector);
+      w6[2] = 0;
+      w6[1] = 0;
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 28:
+      w7[3] = hc_byte_perm_S (w0[2], w0[3], selector);
+      w7[2] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w7[1] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w7[0] = hc_byte_perm_S (    0, w0[0], selector);
+      w6[3] = 0;
+      w6[2] = 0;
+      w6[1] = 0;
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 29:
+      w7[3] = hc_byte_perm_S (w0[1], w0[2], selector);
+      w7[2] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w7[1] = hc_byte_perm_S (    0, w0[0], selector);
+      w7[0] = 0;
+      w6[3] = 0;
+      w6[2] = 0;
+      w6[1] = 0;
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 30:
+      w7[3] = hc_byte_perm_S (w0[0], w0[1], selector);
+      w7[2] = hc_byte_perm_S (    0, w0[0], selector);
+      w7[1] = 0;
+      w7[0] = 0;
+      w6[3] = 0;
+      w6[2] = 0;
+      w6[1] = 0;
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
+      break;
+
+    case 31:
+      w7[3] = hc_byte_perm_S (    0, w0[0], selector);
+      w7[2] = 0;
+      w7[1] = 0;
+      w7[0] = 0;
+      w6[3] = 0;
+      w6[2] = 0;
+      w6[1] = 0;
+      w6[0] = 0;
+      w5[3] = 0;
+      w5[2] = 0;
+      w5[1] = 0;
+      w5[0] = 0;
+      w4[3] = 0;
+      w4[2] = 0;
+      w4[1] = 0;
+      w4[0] = 0;
+      w3[3] = 0;
+      w3[2] = 0;
+      w3[1] = 0;
+      w3[0] = 0;
+      w2[3] = 0;
+      w2[2] = 0;
+      w2[1] = 0;
+      w2[0] = 0;
+      w1[3] = 0;
+      w1[2] = 0;
+      w1[1] = 0;
+      w1[0] = 0;
+      w0[3] = 0;
+      w0[2] = 0;
+      w0[1] = 0;
+      w0[0] = 0;
+
       break;
   }
   #endif
diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl
index a82531a2c..dc575f8da 100644
--- a/OpenCL/inc_hash_argon2.cl
+++ b/OpenCL/inc_hash_argon2.cl
@@ -1,383 +1,386 @@
-/**
- * Author......: Netherlands Forensic Institute
- * License.....: MIT
- *
- * Warp code based on original work by Ondrej Mosnáček
- */
-
-#include "inc_vendor.h"
-#include "inc_types.h"
-#include "inc_platform.h"
-#include "inc_common.h"
-#include "inc_hash_blake2b.h"
-#include "inc_hash_argon2.h"
-
-DECLSPEC void argon2_initial_block (const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks)
-{
-  blake2b_ctx_t ctx;
-
-  blake2b_init (&ctx);
-
-  u64 blake_buf[16] = { 0 };
-
-  blake_buf[0] = sizeof(argon2_block_t);
-
-  blake2b_update (&ctx, (u32 *) blake_buf, 4);
-  blake2b_update (&ctx, in, 64);
-
-  blake_buf[0] = hl32_to_64 (lane, blocknum);
-
-  blake2b_update (&ctx, (u32 *) blake_buf, 8);
-
-  blake2b_final (&ctx);
-
-  u64 *out = blocks[(blocknum * parallelism) + lane].values;
-
-  out[0] = ctx.h[0];
-  out[1] = ctx.h[1];
-  out[2] = ctx.h[2];
-  out[3] = ctx.h[3];
-
-  for (u32 off = 4; off < 124; off += 4)
-  {
-    for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx];
-
-    blake2b_init (&ctx);
-    blake2b_transform (ctx.h, blake_buf, 64, BLAKE2B_FINAL);
-
-    out[off + 0] = ctx.h[0];
-    out[off + 1] = ctx.h[1];
-    out[off + 2] = ctx.h[2];
-    out[off + 3] = ctx.h[3];
-  }
-
-  out[124] = ctx.h[4];
-  out[125] = ctx.h[5];
-  out[126] = ctx.h[6];
-  out[127] = ctx.h[7];
-}
-
-DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, u64 *blockhash)
-{
-  blake2b_ctx_t ctx;
-  blake2b_init (&ctx);
-
-  u32 option_input[32] = { 0 };
-
-  option_input[0] = options->parallelism;
-  option_input[1] = options->digest_len;
-  option_input[2] = options->memory_usage_in_kib;
-  option_input[3] = options->iterations;
-  option_input[4] = options->version;
-  option_input[5] = options->type;
-
-  blake2b_update (&ctx, option_input, 24);
-
-  u32 len_input[32] = { 0 };
-
-  len_input[0] = pw->pw_len;
-
-  blake2b_update (&ctx, len_input, 4);
-  blake2b_update_global (&ctx, pw->i, pw->pw_len);
-
-  len_input[0] = salt->salt_len;
-
-  blake2b_update (&ctx, len_input, 4);
-  blake2b_update_global (&ctx, salt->salt_buf, salt->salt_len);
-
-  len_input[0] = 0;
-
-  blake2b_update (&ctx, len_input, 4); // secret (K)
-  blake2b_update (&ctx, len_input, 4); // associated data (X)
-
-  blake2b_final (&ctx);
-
-  for (u32 idx = 0; idx < 8; idx++) blockhash[idx] = ctx.h[idx];
-}
-
-DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt,
-                           const argon2_options_t *options, GLOBAL_AS argon2_block_t *out)
-{
-  u64 blockhash[16] = { 0 };
-
-  argon2_initial_hash (pw, salt, options, blockhash);
-
-  // Generate the first two blocks of each lane
-  for (u32 lane = 0; lane < options->parallelism; lane++)
-  {
-    argon2_initial_block ((u32 *) blockhash, lane, 0, options->parallelism, out);
-    argon2_initial_block ((u32 *) blockhash, lane, 1, options->parallelism, out);
-  }
-}
-
-DECLSPEC u64 trunc_mul (u64 x, u64 y)
-{
-  const u32 xlo = (u32) x;
-  const u32 ylo = (u32) y;
-  return hl32_to_64_S (hc_umulhi (xlo, ylo), (u32) (xlo * ylo));
-}
-
-DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const argon2_pos_t *pos, u32 index, u64 pseudo_random)
-{
-  u32 ref_lane;
-  u32 ref_area;
-  u32 ref_index;
-
-  if ((pos->pass == 0) && (pos->slice == 0))
-  {
-    ref_lane = pos->lane;
-  }
-  else
-  {
-    ref_lane = h32_from_64_S (pseudo_random) % options->parallelism;
-  }
-
-  ref_area  = (pos->pass == 0) ? pos->slice : (ARGON2_SYNC_POINTS - 1);
-  ref_area *= options->segment_length;
-
-  if ((ref_lane == pos->lane) || (index == 0))
-  {
-      ref_area += (index - 1);
-  }
-
-  const u32 j1 = l32_from_64_S (pseudo_random);
-  ref_index = (ref_area - 1 - hc_umulhi (ref_area, hc_umulhi (j1, j1)));
-
-  if (pos->pass > 0)
-  {
-    ref_index += (pos->slice + 1) * options->segment_length;
-
-    if (ref_index >= options->lane_length)
-    {
-      ref_index -= options->lane_length;
-    }
-  }
-
-  return (options->parallelism * ref_index) + ref_lane;
-}
-
-DECLSPEC void swap_u64 (u64 *x, u64 *y)
-{
-  u64 tmp = *x;
-  *x = *y;
-  *y = tmp;
-}
-
-DECLSPEC void transpose_permute_block (u64 R[4], int thread)
-{
-  if (thread & 0x08)
-  {
-    swap_u64 (&R[0], &R[2]);
-    swap_u64 (&R[1], &R[3]);
-  }
-  if (thread & 0x04)
-  {
-    swap_u64 (&R[0], &R[1]);
-    swap_u64 (&R[2], &R[3]);
-  }
-}
-
-DECLSPEC int argon2_shift (int idx, int thread)
-{
-  const int delta = ((idx & 0x02) << 3) + (idx & 0x01);
-  return (thread & 0x0e) | (((thread & 0x11) + delta + 0x0e) & 0x11);
-}
-
-DECLSPEC void argon2_hash_block (u64 R[4], int thread)
-{
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], thread ^ (idx << 2));
-
-  transpose_permute_block (R, thread);
-
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], thread ^ (idx << 2));
-
-  ARGON2_G(R[0], R[1], R[2], R[3]);
-
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx],  (thread & 0x1c) | ((thread + idx) & 0x03));
-
-  ARGON2_G(R[0], R[1], R[2], R[3]);
-
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], ((thread & 0x1c) | ((thread - idx) & 0x03)) ^ (idx << 2));
-
-  transpose_permute_block (R, thread);
-
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], thread ^ (idx << 2));
-
-  ARGON2_G(R[0], R[1], R[2], R[3]);
-
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], argon2_shift (idx, thread));
-
-  ARGON2_G(R[0], R[1], R[2], R[3]);
-
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = __shfl_sync (FULL_MASK, R[idx], argon2_shift ((4 - idx), thread));
-}
-
-DECLSPEC void argon2_next_addresses (const argon2_options_t *options, const argon2_pos_t *pos, u32 *addresses, u32 start_index, u32 thread)
-{
-  u64 Z[4] = { 0 };
-  u64 tmp[4];
-
-  switch (thread)
-  {
-    case 0:  Z[0] = pos->pass;                   break;
-    case 1:  Z[0] = pos->lane;                   break;
-    case 2:  Z[0] = pos->slice;                  break;
-    case 3:  Z[0] = options->memory_block_count; break;
-    case 4:  Z[0] = options->iterations;         break;
-    case 5:  Z[0] = options->type;               break;
-    case 6:  Z[0] = (start_index / 128) + 1;     break;
-    default: Z[0] = 0;                           break;
-  }
-
-  tmp[0] = Z[0];
-
-  argon2_hash_block (Z, thread);
-
-  Z[0]  ^= tmp[0];
-
-  for (u32 idx = 0; idx < 4; idx++) tmp[idx] = Z[idx];
-
-  argon2_hash_block (Z, thread);
-
-  for (u32 idx = 0; idx < 4; idx++) Z[idx]  ^= tmp[idx];
-
-  for (u32 i = 0, index = (start_index + thread); i < 4; i++, index += THREADS_PER_LANE)
-  {
-    addresses[i] = argon2_ref_address (options, pos, index, Z[i]);
-  }
-}
-
-DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
-{
-  switch (index)
-  {
-    case 0:
-      return array[0];
-    case 1:
-      return array[1];
-    case 2:
-      return array[2];
-    case 3:
-      return array[3];
-  }
-}
-
-DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 thread)
-{
-  // Apply wrap-around to previous block index if the current block is the first block in the lane
-  const u32 prev_in_lane = (index_in_lane == 0) ? (options->lane_length - 1) : (index_in_lane - 1);
-
-  argon2_block_t *prev_block = &blocks[(prev_in_lane * options->parallelism) + lane];
-
-  for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + thread];
-
-  return &blocks[(index_in_lane * options->parallelism) + lane];
-}
-
-DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4],
-                                      u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 thread)
-{
-  for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism)
-  {
-    u32 ref_address;
-
-    if (indep_addr)
-    {
-      ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % ARGON2_SYNC_POINTS);
-      ref_address = __shfl_sync (FULL_MASK, ref_address, index);
-    }
-    else
-    {
-      ref_address = argon2_ref_address (options, pos, index, R[0]);
-      ref_address = __shfl_sync (FULL_MASK, ref_address, 0);
-    }
-
-    GLOBAL_AS const argon2_block_t *ref_block = &blocks[ref_address];
-
-    u64 tmp[4] = { 0 };
-
-    // First pass is overwrite, next passes are XOR with previous
-    if ((pos->pass > 0) && (options->version != ARGON2_VERSION_10))
-    {
-      for (u32 idx = 0; idx < 4; idx++) tmp[idx]  = cur_block->values[(idx * THREADS_PER_LANE) + thread];
-    }
-
-    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= ref_block->values[(idx * THREADS_PER_LANE) + thread];
-
-    for (u32 idx = 0; idx < 4; idx++) tmp[idx] ^= R[idx];
-
-    argon2_hash_block (R, thread);
-
-    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= tmp[idx];
-
-    for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + thread] = R[idx];
-  }
-}
-
-DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos)
-{
-  const u32  thread       = get_local_id(0);
-
-  // We have already generated the first two blocks of each lane (for the first pass)
-  const u32 skip_blocks   = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0;
-  const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks;
-
-  u64 R[4];
-
-  GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, thread);
-
-  if ((options->type == TYPE_I) || ((options->type == TYPE_ID) && (pos->pass == 0) && (pos->slice <= 1)))
-  {
-    for (u32 block_index = 0; block_index < options->segment_length; block_index += 128)
-    {
-      const u32 start_index = (block_index == 0) ? skip_blocks : block_index;
-      const u32 end_index   = MIN(((start_index | 127) + 1), options->segment_length);
-
-      u32 addresses[4];
-
-      argon2_next_addresses (options, pos, addresses, block_index, thread);
-      argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, thread);
-
-      cur_block += (end_index - start_index) * options->parallelism;
-    }
-  }
-  else
-  {
-    u32 addresses[4] = { 0 };
-
-    argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, thread);
-  }
-}
-
-DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out)
-{
-  const u32 lane_length = options->lane_length;
-  const u32 lanes = options->parallelism;
-
-  argon2_block_t final_block = { };
-
-  for (u32 l = 0; l < lanes; l++)
-  {
-    for (u32 idx = 0; idx < 128; idx++) final_block.values[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx];
-  }
-
-  u32 output_len [32] = {0};
-  output_len [0] = options->digest_len;
-
-  blake2b_ctx_t ctx;
-  blake2b_init (&ctx);
-
-  // Override default (0x40) value in BLAKE2b
-  ctx.h[0] ^= 0x40 ^ options->digest_len; 
-
-  blake2b_update (&ctx, output_len, 4);
-  blake2b_update (&ctx, (u32 *) final_block.values, sizeof(final_block));
-
-  blake2b_final (&ctx);
-
-  for (int i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1)
-  {
-    out [i + 0] = l32_from_64_S (ctx.h[idx]);
-    out [i + 1] = h32_from_64_S (ctx.h[idx]);
-  }
-}
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ *
+ * Warp code based on original work by Ondrej Mosnáček
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+#include "inc_common.h"
+#include "inc_hash_blake2b.h"
+#include "inc_hash_argon2.h"
+
+DECLSPEC void argon2_initial_block (const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks)
+{
+  blake2b_ctx_t ctx;
+
+  blake2b_init (&ctx);
+
+  u64 blake_buf[16] = { 0 };
+
+  blake_buf[0] = sizeof(argon2_block_t);
+
+  blake2b_update (&ctx, (u32 *) blake_buf, 4);
+  blake2b_update (&ctx, in, 64);
+
+  blake_buf[0] = hl32_to_64 (lane, blocknum);
+
+  blake2b_update (&ctx, (u32 *) blake_buf, 8);
+
+  blake2b_final (&ctx);
+
+  GLOBAL_AS u64 *out = blocks[(blocknum * parallelism) + lane].values;
+
+  out[0] = ctx.h[0];
+  out[1] = ctx.h[1];
+  out[2] = ctx.h[2];
+  out[3] = ctx.h[3];
+
+  for (u32 off = 4; off < 124; off += 4)
+  {
+    for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx];
+
+    blake2b_init (&ctx);
+    blake2b_transform (ctx.h, blake_buf, 64, BLAKE2B_FINAL);
+
+    out[off + 0] = ctx.h[0];
+    out[off + 1] = ctx.h[1];
+    out[off + 2] = ctx.h[2];
+    out[off + 3] = ctx.h[3];
+  }
+
+  out[124] = ctx.h[4];
+  out[125] = ctx.h[5];
+  out[126] = ctx.h[6];
+  out[127] = ctx.h[7];
+}
+
+DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, u64 *blockhash)
+{
+  blake2b_ctx_t ctx;
+  blake2b_init (&ctx);
+
+  u32 option_input[32] = { 0 };
+
+  option_input[0] = options->parallelism;
+  option_input[1] = options->digest_len;
+  option_input[2] = options->memory_usage_in_kib;
+  option_input[3] = options->iterations;
+  option_input[4] = options->version;
+  option_input[5] = options->type;
+
+  blake2b_update (&ctx, option_input, 24);
+
+  u32 len_input[32] = { 0 };
+
+  len_input[0] = pw->pw_len;
+
+  blake2b_update (&ctx, len_input, 4);
+  blake2b_update_global (&ctx, pw->i, pw->pw_len);
+
+  len_input[0] = salt->salt_len;
+
+  blake2b_update (&ctx, len_input, 4);
+  blake2b_update_global (&ctx, salt->salt_buf, salt->salt_len);
+
+  len_input[0] = 0;
+
+  blake2b_update (&ctx, len_input, 4); // secret (K)
+  blake2b_update (&ctx, len_input, 4); // associated data (X)
+
+  blake2b_final (&ctx);
+
+  for (u32 idx = 0; idx < 8; idx++) blockhash[idx] = ctx.h[idx];
+}
+
+DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt,
+                           const argon2_options_t *options, GLOBAL_AS argon2_block_t *out)
+{
+  u64 blockhash[16] = { 0 };
+
+  argon2_initial_hash (pw, salt, options, blockhash);
+
+  // Generate the first two blocks of each lane
+  for (u32 lane = 0; lane < options->parallelism; lane++)
+  {
+    argon2_initial_block ((u32 *) blockhash, lane, 0, options->parallelism, out);
+    argon2_initial_block ((u32 *) blockhash, lane, 1, options->parallelism, out);
+  }
+}
+
+// TODO: reconsider 'trunc_mul()'
+DECLSPEC u64 trunc_mul (u64 x, u64 y)
+{
+  const u32 xlo = (u32) x;
+  const u32 ylo = (u32) y;
+  return hl32_to_64_S (hc_umulhi (xlo, ylo), (u32) (xlo * ylo));
+}
+
+DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const argon2_pos_t *pos, u32 index, u64 pseudo_random)
+{
+  u32 ref_lane;
+  u32 ref_area;
+  u32 ref_index;
+
+  if ((pos->pass == 0) && (pos->slice == 0))
+  {
+    ref_lane = pos->lane;
+  }
+  else
+  {
+    ref_lane = h32_from_64_S (pseudo_random) % options->parallelism;
+  }
+
+  ref_area  = (pos->pass == 0) ? pos->slice : (ARGON2_SYNC_POINTS - 1);
+  ref_area *= options->segment_length;
+
+  if ((ref_lane == pos->lane) || (index == 0))
+  {
+      ref_area += (index - 1);
+  }
+
+  const u32 j1 = l32_from_64_S (pseudo_random);
+  ref_index = (ref_area - 1 - hc_umulhi (ref_area, hc_umulhi (j1, j1)));
+
+  if (pos->pass > 0)
+  {
+    ref_index += (pos->slice + 1) * options->segment_length;
+
+    if (ref_index >= options->lane_length)
+    {
+      ref_index -= options->lane_length;
+    }
+  }
+
+  return (options->parallelism * ref_index) + ref_lane;
+}
+
+DECLSPEC void swap_u64 (u64 *x, u64 *y)
+{
+  u64 tmp = *x;
+  *x = *y;
+  *y = tmp;
+}
+
+DECLSPEC void transpose_permute_block (u64 R[4], int thread)
+{
+  if (thread & 0x08)
+  {
+    swap_u64 (&R[0], &R[2]);
+    swap_u64 (&R[1], &R[3]);
+  }
+  if (thread & 0x04)
+  {
+    swap_u64 (&R[0], &R[1]);
+    swap_u64 (&R[2], &R[3]);
+  }
+}
+
+DECLSPEC int argon2_shift (int idx, int thread)
+{
+  const int delta = ((idx & 0x02) << 3) + (idx & 0x01);
+  return (thread & 0x0e) | (((thread & 0x11) + delta + 0x0e) & 0x11);
+}
+
+DECLSPEC void argon2_hash_block (u64 R[4], int thread, LOCAL_AS u64 *shuffle_buf)
+{
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], thread ^ (idx << 2));
+
+  transpose_permute_block (R, thread);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], thread ^ (idx << 2));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx],  (thread & 0x1c) | ((thread + idx) & 0x03));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], ((thread & 0x1c) | ((thread - idx) & 0x03)) ^ (idx << 2));
+
+  transpose_permute_block (R, thread);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], thread ^ (idx << 2));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift (idx, thread));
+
+  ARGON2_G(R[0], R[1], R[2], R[3]);
+
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift ((4 - idx), thread));
+}
+
+DECLSPEC void argon2_next_addresses (const argon2_options_t *options, const argon2_pos_t *pos, u32 *addresses, u32 start_index, u32 thread, LOCAL_AS u64 *shuffle_buf)
+{
+  u64 Z[4] = { 0 };
+  u64 tmp[4];
+
+  switch (thread)
+  {
+    case 0:  Z[0] = pos->pass;                   break;
+    case 1:  Z[0] = pos->lane;                   break;
+    case 2:  Z[0] = pos->slice;                  break;
+    case 3:  Z[0] = options->memory_block_count; break;
+    case 4:  Z[0] = options->iterations;         break;
+    case 5:  Z[0] = options->type;               break;
+    case 6:  Z[0] = (start_index / 128) + 1;     break;
+    default: Z[0] = 0;                           break;
+  }
+
+  tmp[0] = Z[0];
+
+  argon2_hash_block (Z, thread, shuffle_buf);
+
+  Z[0]  ^= tmp[0];
+
+  for (u32 idx = 0; idx < 4; idx++) tmp[idx] = Z[idx];
+
+  argon2_hash_block (Z, thread, shuffle_buf);
+
+  for (u32 idx = 0; idx < 4; idx++) Z[idx]  ^= tmp[idx];
+
+  for (u32 i = 0, index = (start_index + thread); i < 4; i++, index += THREADS_PER_LANE)
+  {
+    addresses[i] = argon2_ref_address (options, pos, index, Z[i]);
+  }
+}
+
+DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
+{
+  switch (index)
+  {
+    case 0:
+      return array[0];
+    case 1:
+      return array[1];
+    case 2:
+      return array[2];
+    case 3:
+      return array[3];
+  }
+
+  return -1;
+}
+
+DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 thread)
+{
+  // Apply wrap-around to previous block index if the current block is the first block in the lane
+  const u32 prev_in_lane = (index_in_lane == 0) ? (options->lane_length - 1) : (index_in_lane - 1);
+
+  GLOBAL_AS argon2_block_t *prev_block = &blocks[(prev_in_lane * options->parallelism) + lane];
+
+  for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + thread];
+
+  return &blocks[(index_in_lane * options->parallelism) + lane];
+}
+
+DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4],
+                                      u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 thread, LOCAL_AS u64 *shuffle_buf)
+{
+  for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism)
+  {
+    u32 ref_address;
+
+    if (indep_addr)
+    {
+      ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % ARGON2_SYNC_POINTS);
+      ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, index);
+    }
+    else
+    {
+      ref_address = argon2_ref_address (options, pos, index, R[0]);
+      ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, 0);
+    }
+
+    GLOBAL_AS const argon2_block_t *ref_block = &blocks[ref_address];
+
+    u64 tmp[4] = { 0 };
+
+    // First pass is overwrite, next passes are XOR with previous
+    if ((pos->pass > 0) && (options->version != ARGON2_VERSION_10))
+    {
+      for (u32 idx = 0; idx < 4; idx++) tmp[idx]  = cur_block->values[(idx * THREADS_PER_LANE) + thread];
+    }
+
+    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= ref_block->values[(idx * THREADS_PER_LANE) + thread];
+
+    for (u32 idx = 0; idx < 4; idx++) tmp[idx] ^= R[idx];
+
+    argon2_hash_block (R, thread, shuffle_buf);
+
+    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= tmp[idx];
+
+    for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + thread] = R[idx];
+  }
+}
+
+DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf)
+{
+  const u32  thread       = get_local_id(0);
+
+  // We have already generated the first two blocks of each lane (for the first pass)
+  const u32 skip_blocks   = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0;
+  const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks;
+
+  u64 R[4];
+
+  GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, thread);
+
+  if ((options->type == TYPE_I) || ((options->type == TYPE_ID) && (pos->pass == 0) && (pos->slice <= 1)))
+  {
+    for (u32 block_index = 0; block_index < options->segment_length; block_index += 128)
+    {
+      const u32 start_index = (block_index == 0) ? skip_blocks : block_index;
+      const u32 end_index   = MIN(((start_index | 127) + 1), options->segment_length);
+
+      u32 addresses[4];
+
+      argon2_next_addresses (options, pos, addresses, block_index, thread, shuffle_buf);
+      argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, thread, shuffle_buf);
+
+      cur_block += (end_index - start_index) * options->parallelism;
+    }
+  }
+  else
+  {
+    u32 addresses[4] = { 0 };
+
+    argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, thread, shuffle_buf);
+  }
+}
+
+DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out)
+{
+  const u32 lane_length = options->lane_length;
+  const u32 lanes = options->parallelism;
+
+  argon2_block_t final_block = { };
+
+  for (u32 l = 0; l < lanes; l++)
+  {
+    for (u32 idx = 0; idx < 128; idx++) final_block.values[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx];
+  }
+
+  u32 output_len [32] = {0};
+  output_len [0] = options->digest_len;
+
+  blake2b_ctx_t ctx;
+  blake2b_init (&ctx);
+
+  // Override default (0x40) value in BLAKE2b
+  ctx.h[0] ^= 0x40 ^ options->digest_len; 
+
+  blake2b_update (&ctx, output_len, 4);
+  blake2b_update (&ctx, (u32 *) final_block.values, sizeof(final_block));
+
+  blake2b_final (&ctx);
+
+  for (int i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1)
+  {
+    out [i + 0] = l32_from_64_S (ctx.h[idx]);
+    out [i + 1] = h32_from_64_S (ctx.h[idx]);
+  }
+}
diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h
index 8980068cd..922afc44a 100644
--- a/OpenCL/inc_hash_argon2.h
+++ b/OpenCL/inc_hash_argon2.h
@@ -1,84 +1,147 @@
-/**
- * Author......: Netherlands Forensic Institute
- * License.....: MIT
- */
-
-#ifndef INC_HASH_ARGON2_H
-#define INC_HASH_ARGON2_H
-
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-
-#define ARGON2_VERSION_10 0x10
-#define ARGON2_VERSION_13 0x13
-
-#define THREADS_PER_LANE 32
-#define FULL_MASK 0xffffffff
-
-#define BLAKE2B_OUTBYTES 64
-#define ARGON2_SYNC_POINTS 4
-#define ARGON2_ADDRESSES_IN_BLOCK 128
-
-#define TYPE_D  0
-#define TYPE_I  1
-#define TYPE_ID 2
-
-#define ARGON2_G(a,b,c,d)                \
-{                                        \
-  a = a + b + 2 * trunc_mul(a, b);       \
-  d = blake2b_rot32_S (d ^ a);           \
-  c = c + d + 2 * trunc_mul(c, d);       \
-  b = blake2b_rot24_S (b ^ c);           \
-  a = a + b + 2 * trunc_mul(a, b);       \
-  d = blake2b_rot16_S (d ^ a);           \
-  c = c + d + 2 * trunc_mul(c, d);       \
-  b = hc_rotr64_S (b ^ c, 63);           \
-}
-
-#define ARGON2_P()                       \
-{                                        \
-  ARGON2_G(v[0], v[4], v[8], v[12]);     \
-  ARGON2_G(v[1], v[5], v[9], v[13]);     \
-  ARGON2_G(v[2], v[6], v[10], v[14]);    \
-  ARGON2_G(v[3], v[7], v[11], v[15]);    \
-                                         \
-  ARGON2_G(v[0], v[5], v[10], v[15]);    \
-  ARGON2_G(v[1], v[6], v[11], v[12]);    \
-  ARGON2_G(v[2], v[7], v[8], v[13]);     \
-  ARGON2_G(v[3], v[4], v[9], v[14]);     \
-}
-
-typedef struct argon2_block
-{
-  u64 values[128];
-
-} argon2_block_t;
-
-typedef struct argon2_options
-{
-  u32 type;
-  u32 version;
-
-  u32 iterations;
-  u32 parallelism;
-  u32 memory_usage_in_kib;
-
-  u32 segment_length;
-  u32 lane_length;
-  u32 memory_block_count;
-  u32 digest_len;
-
-} argon2_options_t;
-
-typedef struct argon2_pos
-{
-  u32 pass;
-  u32 slice;
-  u32 lane;
-
-} argon2_pos_t;
-
-DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
-DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos);
-DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out);
-
-#endif // INC_HASH_ARGON2_H
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ */
+
+#ifndef INC_HASH_ARGON2_H
+#define INC_HASH_ARGON2_H
+
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+#define ARGON2_VERSION_10 0x10
+#define ARGON2_VERSION_13 0x13
+
+#define THREADS_PER_LANE 32
+#define FULL_MASK 0xffffffff
+
+#define BLAKE2B_OUTBYTES 64
+#define ARGON2_SYNC_POINTS 4
+#define ARGON2_ADDRESSES_IN_BLOCK 128
+
+#define TYPE_D  0
+#define TYPE_I  1
+#define TYPE_ID 2
+
+#if defined IS_CUDA
+#define hc__shfl_sync(shfbuf,mask,var,srcLane) __shfl_sync ((mask),(var),(srcLane))
+#elif defined IS_HIP
+// attention hard coded 32 warps for hip here
+#define hc__shfl_sync(shfbuf,mask,var,srcLane) __shfl ((var),(srcLane),32)
+#elif defined IS_OPENCL
+#define hc__shfl_sync(shfbuf,mask,var,srcLane) hc__shfl ((shfbuf),(var),(srcLane))
+
+#if defined IS_AMD && defined IS_GPU
+DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane)
+{
+  const u32 idx = src_lane << 2;
+
+  const u32 l32 = l32_from_64_S (var);
+  const u32 h32 = h32_from_64_S (var);
+
+  const u32 l32r = __builtin_amdgcn_ds_bpermute (idx, l32);
+  const u32 h32r = __builtin_amdgcn_ds_bpermute (idx, h32);
+
+  const u64 out = hl32_to_64_S (h32r, l32r);
+
+  return out;
+}
+#elif defined IS_NV && defined IS_GPU
+DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane)
+{
+  const u32 l32 = l32_from_64_S (var);
+  const u32 h32 = h32_from_64_S (var);
+
+  u32 l32r;
+  u32 h32r;
+
+  asm("shfl.sync.idx.b32 %0, %1, %2, 0x1f, 0;"
+      : "=r"(l32r)
+      : "r"(l32), "r"(src_lane));
+
+  asm("shfl.sync.idx.b32 %0, %1, %2, 0x1f, 0;"
+      : "=r"(h32r)
+      : "r"(h32), "r"(src_lane));
+
+  const u64 out = hl32_to_64_S (h32r, l32r);
+
+  return out;
+}
+#else
+DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane)
+{
+  const u32 lid = get_local_id (0);
+
+  shuffle_buf[lid] = var;
+
+  barrier (CLK_LOCAL_MEM_FENCE);
+
+  const u64 out = shuffle_buf[src_lane & 31];
+
+  return out;
+}
+#endif
+
+#elif defined IS_METAL
+//todo
+#endif
+
+#define ARGON2_G(a,b,c,d)                \
+{                                        \
+  a = a + b + 2 * trunc_mul(a, b);       \
+  d = blake2b_rot32_S (d ^ a);           \
+  c = c + d + 2 * trunc_mul(c, d);       \
+  b = blake2b_rot24_S (b ^ c);           \
+  a = a + b + 2 * trunc_mul(a, b);       \
+  d = blake2b_rot16_S (d ^ a);           \
+  c = c + d + 2 * trunc_mul(c, d);       \
+  b = hc_rotr64_S (b ^ c, 63);           \
+}
+
+#define ARGON2_P()                       \
+{                                        \
+  ARGON2_G(v[0], v[4], v[8], v[12]);     \
+  ARGON2_G(v[1], v[5], v[9], v[13]);     \
+  ARGON2_G(v[2], v[6], v[10], v[14]);    \
+  ARGON2_G(v[3], v[7], v[11], v[15]);    \
+                                         \
+  ARGON2_G(v[0], v[5], v[10], v[15]);    \
+  ARGON2_G(v[1], v[6], v[11], v[12]);    \
+  ARGON2_G(v[2], v[7], v[8], v[13]);     \
+  ARGON2_G(v[3], v[4], v[9], v[14]);     \
+}
+
+typedef struct argon2_block
+{
+  u64 values[128];
+
+} argon2_block_t;
+
+typedef struct argon2_options
+{
+  u32 type;
+  u32 version;
+
+  u32 iterations;
+  u32 parallelism;
+  u32 memory_usage_in_kib;
+
+  u32 segment_length;
+  u32 lane_length;
+  u32 memory_block_count;
+  u32 digest_len;
+
+} argon2_options_t;
+
+typedef struct argon2_pos
+{
+  u32 pass;
+  u32 slice;
+  u32 lane;
+
+} argon2_pos_t;
+
+DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
+DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf);
+DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out);
+
+#endif // INC_HASH_ARGON2_H
diff --git a/OpenCL/inc_platform.cl b/OpenCL/inc_platform.cl
index 36303296f..0a386f22e 100644
--- a/OpenCL/inc_platform.cl
+++ b/OpenCL/inc_platform.cl
@@ -114,7 +114,9 @@ DECLSPEC size_t get_group_id (const u32 dimindx)
       return blockIdx.y;
     case 2:
       return blockIdx.z;
-  }  
+  }
+
+  return (size_t) -1;
 }
 
 DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
@@ -133,6 +135,8 @@ DECLSPEC size_t get_local_id (const u32 dimindx)
     case 2:
       return threadIdx.z;
   }
+
+  return (size_t) -1;
 }
 
 DECLSPEC size_t get_local_size (const u32 dimindx)
@@ -145,7 +149,9 @@ DECLSPEC size_t get_local_size (const u32 dimindx)
       return blockDim.y;
     case 2:
       return blockDim.z;
-  }  
+  }
+
+  return (size_t) -1;
 }
 
 DECLSPEC u32x rotl32 (const u32x a, const int n)
@@ -328,9 +334,19 @@ DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
   return atomicOr (p, val);
 }
 
-DECLSPEC size_t get_group_id  (const u32 dimindx __attribute__((unused)))
+DECLSPEC size_t get_group_id (const u32 dimindx)
 {
-  return blockIdx.x;
+  switch (dimindx)
+  {
+    case 0:
+      return blockIdx.x;
+    case 1:
+      return blockIdx.y;
+    case 2:
+      return blockIdx.z;
+  }
+
+  return (size_t) -1;
 }
 
 DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
@@ -338,15 +354,34 @@ DECLSPEC size_t get_global_id  (const u32 dimindx __attribute__((unused)))
   return (blockIdx.x * blockDim.x) + threadIdx.x;
 }
 
-DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
+DECLSPEC size_t get_local_id (const u32 dimindx)
 {
-  return threadIdx.x;
+  switch (dimindx)
+  {
+    case 0:
+      return threadIdx.x;
+    case 1:
+      return threadIdx.y;
+    case 2:
+      return threadIdx.z;
+  }
+
+  return (size_t) -1;
 }
 
-DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
+DECLSPEC size_t get_local_size (const u32 dimindx)
 {
-  // verify
-  return blockDim.x;
+  switch (dimindx)
+  {
+    case 0:
+      return blockDim.x;
+    case 1:
+      return blockDim.y;
+    case 2:
+      return blockDim.z;
+  }
+
+  return (size_t) -1;
 }
 
 DECLSPEC u32x rotl32 (const u32x a, const int n)
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index 4e203ff18..e1ffdefcf 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -49,7 +49,8 @@ DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_or  (volatile GLOBAL_AS u32 *p, volatile const u32 val);
 
-DECLSPEC size_t get_global_id   (const u32 dimindx);
+DECLSPEC size_t get_global_id   (const u32 dimindx __attribute__((unused)));
+DECLSPEC size_t get_group_id    (const u32 dimindx);
 DECLSPEC size_t get_local_id    (const u32 dimindx);
 DECLSPEC size_t get_local_size  (const u32 dimindx);
 
diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
index 8c17a523d..483adf59c 100644
--- a/OpenCL/m34000-pure.cl
+++ b/OpenCL/m34000-pure.cl
@@ -14,6 +14,12 @@
 #define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl)
 
 typedef struct argon2_tmp
+{
+  u32 state[4]; // just something for now
+
+} argon2_tmp_t;
+
+typedef struct argon2_extra
 {
 #ifndef ARGON2_TMP_ELEM
 #define ARGON2_TMP_ELEM 1
@@ -21,26 +27,59 @@ typedef struct argon2_tmp
 
   argon2_block_t blocks[ARGON2_TMP_ELEM];
 
-} argon2_tmp_t;
+} argon2_extra_t;
 
-KERNEL_FQ void m34000_init (_KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
+KERNEL_FQ KERNEL_FA void m34000_init (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
 {
   const u64 gid = get_global_id (0);
 
   if (gid >= GID_CNT) return;
 
+  const u32 gd4 = gid / 4;
+  const u32 gm4 = gid % 4;
+
+  GLOBAL_AS argon2_extra_t *V;
+
+  switch (gm4)
+  {
+    case 0: V = (GLOBAL_AS argon2_extra_t *) d_extra0_buf; break;
+    case 1: V = (GLOBAL_AS argon2_extra_t *) d_extra1_buf; break;
+    case 2: V = (GLOBAL_AS argon2_extra_t *) d_extra2_buf; break;
+    case 3: V = (GLOBAL_AS argon2_extra_t *) d_extra3_buf; break;
+  }
+
+  GLOBAL_AS argon2_extra_t *argon2_extra = V + gd4;
+
   const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
 
-  argon2_init (&pws[gid], &salt_bufs[SALT_POS_HOST], &options, tmps[gid].blocks);
+  argon2_init (&pws[gid], &salt_bufs[SALT_POS_HOST], &options, argon2_extra->blocks);
 }
 
-KERNEL_FQ void m34000_loop (_KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
+KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
 {
-  const u64 gid = get_group_id (0);
+  const u64 bid = get_group_id (0);
   const u64 lid = get_local_id (1);
   const u64 lsz = get_local_size (1);
 
-  if (gid >= GID_CNT) return;
+  if (bid >= GID_CNT) return;
+
+  LOCAL_VK u64 shuffle_bufs[ARGON2_PARALLELISM][32];
+  LOCAL_AS u64 *shuffle_buf = shuffle_bufs[lid];
+
+  const u32 bd4 = bid / 4;
+  const u32 bm4 = bid % 4;
+
+  GLOBAL_AS argon2_extra_t *V;
+
+  switch (bm4)
+  {
+    case 0: V = (GLOBAL_AS argon2_extra_t *) d_extra0_buf; break;
+    case 1: V = (GLOBAL_AS argon2_extra_t *) d_extra1_buf; break;
+    case 2: V = (GLOBAL_AS argon2_extra_t *) d_extra2_buf; break;
+    case 3: V = (GLOBAL_AS argon2_extra_t *) d_extra3_buf; break;
+  }
+
+  GLOBAL_AS argon2_extra_t *argon2_extra = V + bd4;
 
   argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
 
@@ -55,7 +94,7 @@ KERNEL_FQ void m34000_loop (_KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_
   {
     for (pos.lane = lid; pos.lane < options.parallelism; pos.lane += lsz)
     {
-      argon2_fill_segment (tmps[gid].blocks, &options, &pos);
+      argon2_fill_segment (argon2_extra->blocks, &options, &pos, shuffle_buf);
     }
 
     SYNC_THREADS ();
@@ -70,17 +109,32 @@ KERNEL_FQ void m34000_loop (_KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_
   }
 }
 
-KERNEL_FQ void m34000_comp ( _KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
+KERNEL_FQ KERNEL_FA void m34000_comp (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
 {
   const u64 gid = get_global_id (0);
 
   if (gid >= GID_CNT) return;
 
+  const u32 gd4 = gid / 4;
+  const u32 gm4 = gid % 4;
+
+  GLOBAL_AS argon2_extra_t *V;
+
+  switch (gm4)
+  {
+    case 0: V = (GLOBAL_AS argon2_extra_t *) d_extra0_buf; break;
+    case 1: V = (GLOBAL_AS argon2_extra_t *) d_extra1_buf; break;
+    case 2: V = (GLOBAL_AS argon2_extra_t *) d_extra2_buf; break;
+    case 3: V = (GLOBAL_AS argon2_extra_t *) d_extra3_buf; break;
+  }
+
+  GLOBAL_AS argon2_extra_t *argon2_extra = V + gd4;
+
   u32 out[8];
 
   const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
 
-  argon2_final (tmps[gid].blocks, &options, out);
+  argon2_final (argon2_extra->blocks, &options, out);
 
   const u32 r0 = out[0];
   const u32 r1 = out[1];
diff --git a/src/autotune.c b/src/autotune.c
index 891baa472..a599e65be 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -107,7 +107,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   const double target_msec = backend_ctx->target_msec;
 
-  const u32 kernel_accel_min =  (hashconfig->opts_type & OPTS_TYPE_MAXIMUM_ACCEL) ? device_param->kernel_accel_max : device_param->kernel_accel_min;
+  const u32 kernel_accel_min = device_param->kernel_accel_min;
   const u32 kernel_accel_max = device_param->kernel_accel_max;
 
   const u32 kernel_loops_min = device_param->kernel_loops_min;
diff --git a/src/backend.c b/src/backend.c
index 8c5dc47d5..c10cf769f 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2662,16 +2662,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
 
-    if ((kern_run == KERN_RUN_2) && (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP))
-    {
-      const u32 warp_size = device_param->kernel_preferred_wgs_multiple;
-
-      if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, num, 1, 1, warp_size, blockDimY, 1, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
-    }
-    else
-    {
-      if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
-    }
+    if (hc_cuLaunchKernel (hashcat_ctx, cuda_function, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, dynamic_shared_mem, device_param->cuda_stream, device_param->kernel_params, NULL) == -1) return -1;
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event2, device_param->cuda_stream) == -1) return -1;
 
@@ -16286,7 +16277,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     // Still not 100% sure about the 64MiB here
 
-    const u64 size_device_extra = MAX ((1024 * 1024 * 1024), size_device_extra1234);
+    const u64 size_device_extra = MAX ((64ULL * 1024 * 1024), size_device_extra1234);
 
     // we will first decrease accel and when reached that limit, we will decrease threads
     // when we decrease limit this will restore accel_max
diff --git a/src/modules/module_34000.c b/src/modules/module_34000.c
index 64056b36f..60a37358c 100644
--- a/src/modules/module_34000.c
+++ b/src/modules/module_34000.c
@@ -9,6 +9,7 @@
 #include "bitops.h"
 #include "convert.h"
 #include "shared.h"
+#include "memory.h"
 
 #define ARGON2_SYNC_POINTS  4
 #define ARGON2_BLOCK_SIZE   1024
@@ -18,17 +19,16 @@ static const u32   DGST_POS0      = 0;
 static const u32   DGST_POS1      = 1;
 static const u32   DGST_POS2      = 2;
 static const u32   DGST_POS3      = 3;
-static const u32   DGST_SIZE      = DGST_SIZE_4_8;
+static const u32   DGST_SIZE      = DGST_SIZE_8_16;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_GENERIC_KDF;
-static const char *HASH_NAME      = "Argon2ID";
+static const char *HASH_NAME      = "Argon2";
 static const u64   KERN_TYPE      = 34000;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_SLOW_HASH_DIMY_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_NATIVE_THREADS
-                                  | OPTS_TYPE_MP_MULTI_DISABLE
-                                  | OPTS_TYPE_MAXIMUM_ACCEL;
+                                  | OPTS_TYPE_THREAD_MULTI_DISABLE
+                                  | OPTS_TYPE_MP_MULTI_DISABLE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$argon2id$v=19$m=65536,t=3,p=1$FBMjI4RJBhIykCgol1KEJA$2ky5GAdhT1kH4kIgPN/oERE3Taiy43vNN70a3HpiKQU";
@@ -48,6 +48,12 @@ u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig,
 const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
 const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
+typedef struct argon2_tmp
+{
+  u32 state[4]; // just something for now
+
+} argon2_tmp_t;
+
 typedef struct argon2_options
 {
   u32 type;
@@ -65,6 +71,8 @@ typedef struct argon2_options
 
 } argon2_options_t;
 
+static const char *SIGNATURE_ARGON2D  = "$argon2d$";
+static const char *SIGNATURE_ARGON2I  = "$argon2i$";
 static const char *SIGNATURE_ARGON2ID = "$argon2id$";
 
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@@ -76,39 +84,18 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
 
 u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
-  const u32 kernel_threads_min = 1;
+  const u32 kernel_threads_min = 32; // hard-coded in kernel
 
   return kernel_threads_min;
 }
 
 u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
-  const u32 kernel_threads_max = 1;
+  const u32 kernel_threads_max = 32; // hard-coded in kernel
 
   return kernel_threads_max;
 }
 
-u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_loops_min = 1;
-
-  return kernel_loops_min;
-}
-
-u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_loops_max = 1;
-
-  return kernel_loops_max;
-}
-
-bool module_warmup_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const bool warmup_disable = true;
-
-  return warmup_disable;
-}
-
 u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 tmp_size = 0; // we'll add some later
@@ -116,28 +103,104 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
+{
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
+
+  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+
+  const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
+
+  int   lines_sz  = 4096;
+  char *lines_buf = hcmalloc (lines_sz);
+  int   lines_pos = 0;
+
+  const u32 device_processors = device_param->device_processors;
+
+  const u32 device_maxworkgroup_size = device_param->device_maxworkgroup_size;
+
+  const u64 fixed_mem = (256 * 1024 * 1024); // some storage we need for pws[], tmps[], and others. Is around 72MiB in reality.
+
+  const u64 spill_mem = 2048 * device_processors * device_maxworkgroup_size; // 1600 according to ptxas
+
+  const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (fixed_mem + spill_mem);
+
+  u32 kernel_accel_new = device_processors;
+
+  if (kernel_accel_user)
+  {
+    kernel_accel_new = kernel_accel_user;
+  }
+  else
+  {
+    if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == false))
+    {
+      kernel_accel_new = available_mem / size_per_accel;
+
+      kernel_accel_new = MIN (kernel_accel_new, 1024); // 1024 = max supported
+    }
+  }
+
+  char *new_device_name = hcstrdup (device_param->device_name);
+
+  for (size_t i = 0; i < strlen (new_device_name); i++)
+  {
+    if (new_device_name[i] == ' ') new_device_name[i] = '_';
+  }
+
+  lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
+
+  hcfree (new_device_name);
+
+  return lines_buf;
+}
+
+u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
+
+  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+
+  const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
+
+  const u64 size_argon2 = device_param->kernel_accel_max * size_per_accel;
+
+  return size_argon2;
+}
+
 u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
 {
-  argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
 
-  const u32 memory_block_count = options[0].memory_block_count;
+  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+  const u32 parallelism        = (options->parallelism)        ? options->parallelism        : options_st->parallelism;
 
-  // we need to check that all hashes have the same memory requirement
   for (u32 i = 1; i < hashes->salts_cnt; i++)
   {
-    if (options[i].memory_block_count != memory_block_count) return (1ULL << 63) + i;
+    if ((memory_block_count != options[i].memory_block_count)
+     || (parallelism        != options[i].parallelism))
+    {
+      return (1ULL << 63) + i;
+    }
   }
 
   // now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
 
-  if (user_options->self_test == true)
+  if ((hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) == 0)
   {
-    argon2_options_t *st_options = (argon2_options_t *) hashes->st_esalts_buf;
-
-    if (st_options[0].memory_block_count != memory_block_count) return (1ULL << 62);
+    if ((memory_block_count != options_st->memory_block_count)
+     || (parallelism        != options_st->parallelism))
+    {
+      return (1ULL << 62);
+    }
   }
 
-  const u64 tmp_size = ARGON2_BLOCK_SIZE * memory_block_count;
+  u64 tmp_size = sizeof (argon2_tmp_t);
 
   return tmp_size;
 }
@@ -148,7 +211,7 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%" PRIu32 " -D ARGON2_TMP_ELEM=%" PRIu32, options[0].parallelism, options[0].memory_block_count);
+  hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
 
   return jit_build_options;
 }
@@ -165,12 +228,15 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   token.token_cnt  = 7;
 
-  token.signatures_cnt    = 1;
-  token.signatures_buf[0] = SIGNATURE_ARGON2ID;
+  token.signatures_cnt    = 3;
+  token.signatures_buf[0] = SIGNATURE_ARGON2D;
+  token.signatures_buf[1] = SIGNATURE_ARGON2I;
+  token.signatures_buf[2] = SIGNATURE_ARGON2ID;
 
-  token.len[0]     = 10;
-  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
-                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+  token.len_min[0] = 9;
+  token.len_max[0] = 10;
+  token.sep[0]     = 0;
+  token.attr[0]    = TOKEN_ATTR_VERIFY_SIGNATURE;
 
   // version
   token.len[1]     = 4;
@@ -203,8 +269,8 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
                    | TOKEN_ATTR_VERIFY_BASE64A;
 
   // target hash
-  token.len_min[6] = ((SALT_MIN * 8) / 6) + 0;
-  token.len_max[6] = ((SALT_MAX * 8) / 6) + 3;
+  token.len_min[6] = ((  1 * 8) / 6) + 0;
+  token.len_max[6] = ((128 * 8) / 6) + 3;
   token.sep[6]     = '$';
   token.attr[6]    = TOKEN_ATTR_VERIFY_LENGTH
                    | TOKEN_ATTR_VERIFY_BASE64A;
@@ -213,13 +279,23 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
 
+  // signature sets argon2 typ
+
+  const int sig_len = token.len[0];
+  const u8 *sig_pos = token.buf[0];
+
+  if      (memcmp (SIGNATURE_ARGON2D,  sig_pos, sig_len) == 0) options->type = 0;
+  else if (memcmp (SIGNATURE_ARGON2I,  sig_pos, sig_len) == 0) options->type = 1;
+  else if (memcmp (SIGNATURE_ARGON2ID, sig_pos, sig_len) == 0) options->type = 2;
+  else
+    return (PARSER_SIGNATURE_UNMATCHED);
+
   // argon2id config
   const u8 *ver_pos = token.buf[1];
   const u8 *mem_pos = token.buf[2];
   const u8 *it_pos  = token.buf[3];
   const u8 *par_pos = token.buf[4];
 
-  options->type                = 2; // Only support for Argon2id
   options->version             = hc_strtoul ((const char *) ver_pos + 2, NULL, 10);
   options->memory_usage_in_kib = hc_strtoul ((const char *) mem_pos + 2, NULL, 10);
   options->iterations          = hc_strtoul ((const char *) it_pos  + 2, NULL, 10);
@@ -259,17 +335,31 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   // salt
   char base64_salt[512] = { 0 };
-  base64_encode (int_to_base64, (const u8 *) salt->salt_buf, salt->salt_len, (u8 *) base64_salt);
+  int len1 = base64_encode (int_to_base64, (const u8 *) salt->salt_buf, salt->salt_len, (u8 *) base64_salt);
+
+  for (int i = len1 - 1; i >=0; i--) if (base64_salt[i] == '=') base64_salt[i] = 0;
 
   // digest
   char base64_digest[512] = { 0 };
-  base64_encode (int_to_base64, (const u8 *) digest, options->digest_len, (u8 *) base64_digest);
+  int len2 = base64_encode (int_to_base64, (const u8 *) digest, options->digest_len, (u8 *) base64_digest);
+
+  for (int i = len2 - 1; i >=0; i--) if (base64_digest[i] == '=') base64_digest[i] = 0;
 
   // out
+
+  const char *signature = NULL;
+
+  switch (options->type)
+  {
+    case 0: signature = SIGNATURE_ARGON2D;  break;
+    case 1: signature = SIGNATURE_ARGON2I;  break;
+    case 2: signature = SIGNATURE_ARGON2ID; break;
+  }
+
   u8 *out_buf = (u8 *) line_buf;
 
   const int out_len = snprintf ((char *) out_buf, line_size, "%sv=%d$m=%d,t=%d,p=%d$%s$%s",
-    SIGNATURE_ARGON2ID,
+    signature,
     options->version,
     options->memory_usage_in_kib,
     options->iterations,
@@ -303,9 +393,9 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_dgst_size                = module_dgst_size;
   module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
   module_ctx->module_esalt_size               = module_esalt_size;
-  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size        = module_extra_buffer_size;
   module_ctx->module_extra_tmp_size           = module_extra_tmp_size;
-  module_ctx->module_extra_tuningdb_block     = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block     = module_extra_tuningdb_block;
   module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
   module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
   module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
@@ -335,8 +425,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = module_kernel_threads_min;
   module_ctx->module_kern_type                = module_kern_type;
@@ -359,5 +449,5 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_st_pass                  = module_st_pass;
   module_ctx->module_tmp_size                 = module_tmp_size;
   module_ctx->module_unstable_warning         = MODULE_DEFAULT;
-  module_ctx->module_warmup_disable           = module_warmup_disable;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
 }
diff --git a/src/shared.c b/src/shared.c
index f7dbcd78d..0ff206815 100644
--- a/src/shared.c
+++ b/src/shared.c
@@ -105,8 +105,8 @@ static const char *const OPTI_STR_USES_BITS_16         = "Uses-16-Bit";
 static const char *const OPTI_STR_USES_BITS_32         = "Uses-32-Bit";
 static const char *const OPTI_STR_USES_BITS_64         = "Uses-64-Bit";
 static const char *const OPTI_STR_SLOW_HASH_DIMY_INIT  = "Slow-Hash-DimensionY-INIT";
-static const char *const OPTI_STR_SLOW_HASH_DIMY_COMP  = "Slow-Hash-DimensionY-LOOP";
-static const char *const OPTI_STR_SLOW_HASH_DIMY_LOOP  = "Slow-Hash-DimensionY-COMP";
+static const char *const OPTI_STR_SLOW_HASH_DIMY_LOOP  = "Slow-Hash-DimensionY-LOOP";
+static const char *const OPTI_STR_SLOW_HASH_DIMY_COMP  = "Slow-Hash-DimensionY-COMP";
 
 static const char *const HASH_CATEGORY_UNDEFINED_STR              = "Undefined";
 static const char *const HASH_CATEGORY_RAW_HASH_STR               = "Raw Hash";
@@ -1252,15 +1252,52 @@ int input_tokenizer (const u8 *input_buf, const int input_len, hc_token_t *token
     {
       const int len = token->len[token_idx];
 
-      token->buf[token_idx + 1] = token->buf[token_idx] + len;
-
-      len_left -= len;
-
-      if (token->sep[token_idx] != 0)
+      if (len)
       {
-        token->buf[token_idx + 1]++; // +1 = separator
+        token->buf[token_idx + 1] = token->buf[token_idx] + len;
 
-        len_left--; // -1 = separator
+        len_left -= len;
+
+        if (token->sep[token_idx] != 0)
+        {
+          token->buf[token_idx + 1]++; // +1 = separator
+
+          len_left--; // -1 = separator
+        }
+      }
+
+      const int len_min = token->len_min[token_idx];
+      const int len_max = token->len_max[token_idx];
+
+      if (len_max)
+      {
+        bool matched = false;
+
+        if (token->attr[token_idx] & TOKEN_ATTR_VERIFY_SIGNATURE)
+        {
+          for (int signature_idx = 0; signature_idx < token->signatures_cnt; signature_idx++)
+          {
+            const int len_sig = strlen (token->signatures_buf[signature_idx]);
+
+            if (len_sig > len_left) continue;
+
+            if ((len_sig >= len_min) && (len_sig <= len_max))
+            {
+              if (memcmp (token->buf[token_idx], token->signatures_buf[signature_idx], len_sig) == 0)
+              {
+                token->len[token_idx] = len_sig;
+
+                token->buf[token_idx + 1] = token->buf[token_idx] + len_sig;
+
+                len_left -= len_sig;
+
+                matched = true;
+              }
+            }
+          }
+
+          if (matched == false) return (PARSER_SIGNATURE_UNMATCHED);
+        }
       }
     }
   }
diff --git a/tools/test_modules/m34000.pm b/tools/test_modules/m34000.pm
new file mode 100644
index 000000000..7b2e671ff
--- /dev/null
+++ b/tools/test_modules/m34000.pm
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use MIME::Base64  qw (decode_base64 encode_base64);
+use Crypt::Argon2 qw (argon2_raw);
+
+sub module_constraints { [[0, 256], [32, 32], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word  = shift;
+  my $salt  = shift;
+  my $sign  = shift // ("argon2d","argon2i","argon2id")[random_number (0, 2)];
+  my $m     = shift // 65536;
+  my $t     = shift // 3;
+  my $p     = shift // 1;
+  my $len   = shift // random_number (1, 2) * 16;
+
+  my $salt_bin = pack ("H*", $salt);
+
+  my $digest_bin = argon2_raw ($sign, $word, $salt_bin, $t, $m . "k", $p, $len);
+
+  my $salt_base64   = encode_base64 ($salt_bin,   ""); $salt_base64   =~ s/=+$//;
+  my $digest_base64 = encode_base64 ($digest_bin, ""); $digest_base64 =~ s/=+$//;
+
+  my $hash = sprintf ('$%s$v=19$m=%d,t=%d,p=%d$%s$%s', $sign, $m, $t, $p, $salt_base64, $digest_base64);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my $idx = index ($line, ':');
+
+  return unless $idx >= 0;
+
+  my $hash = substr ($line, 0, $idx);
+  my $word = substr ($line, $idx + 1);
+
+  return unless ((substr ($hash, 0,  9) eq '$argon2d$')
+              || (substr ($hash, 0,  9) eq '$argon2i$')
+              || (substr ($hash, 0, 10) eq '$argon2id$'));              
+
+  my (undef, $signature, $version, $config, $salt, $digest) = split '\$', $hash;
+
+  return unless defined $signature;
+  return unless defined $version;
+  return unless defined $config;
+  return unless defined $salt;
+  return unless defined $digest;
+
+  my ($m_config, $t_config, $p_config) = split ("\,", $config);
+
+  return unless ($version eq "v=19");
+
+  my $m = (split ("=", $m_config))[1];
+  my $t = (split ("=", $t_config))[1];
+  my $p = (split ("=", $p_config))[1];
+
+  $salt   = decode_base64 ($salt);
+  $digest = decode_base64 ($digest);
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, unpack ("H*", $salt), $signature, $m, $t, $p, length ($digest));
+
+  return ($new_hash, $word);
+}
+
+1;

From 8a91fccefd8811e8d1c02d04d98675ca507bec4e Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Wed, 2 Jul 2025 22:19:39 +0200
Subject: [PATCH 60/83] porting to metal and fix OpenCL bug on hc__shfl

---
 OpenCL/inc_hash_argon2.cl | 123 +++++++++++++++++++++-----------------
 OpenCL/inc_hash_argon2.h  |  38 +++++++-----
 OpenCL/m34000-pure.cl     |   6 +-
 3 files changed, 98 insertions(+), 69 deletions(-)

diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl
index dc575f8da..c87179d2c 100644
--- a/OpenCL/inc_hash_argon2.cl
+++ b/OpenCL/inc_hash_argon2.cl
@@ -12,7 +12,7 @@
 #include "inc_hash_blake2b.h"
 #include "inc_hash_argon2.h"
 
-DECLSPEC void argon2_initial_block (const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks)
+DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks)
 {
   blake2b_ctx_t ctx;
 
@@ -22,12 +22,12 @@ DECLSPEC void argon2_initial_block (const u32 *in, const u32 lane, const u32 blo
 
   blake_buf[0] = sizeof(argon2_block_t);
 
-  blake2b_update (&ctx, (u32 *) blake_buf, 4);
+  blake2b_update (&ctx, (PRIVATE_AS u32 *) blake_buf, 4);
   blake2b_update (&ctx, in, 64);
 
   blake_buf[0] = hl32_to_64 (lane, blocknum);
 
-  blake2b_update (&ctx, (u32 *) blake_buf, 8);
+  blake2b_update (&ctx, (PRIVATE_AS u32 *) blake_buf, 8);
 
   blake2b_final (&ctx);
 
@@ -57,7 +57,7 @@ DECLSPEC void argon2_initial_block (const u32 *in, const u32 lane, const u32 blo
   out[127] = ctx.h[7];
 }
 
-DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, u64 *blockhash)
+DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u64 *blockhash)
 {
   blake2b_ctx_t ctx;
   blake2b_init (&ctx);
@@ -96,7 +96,7 @@ DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const sal
 }
 
 DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt,
-                           const argon2_options_t *options, GLOBAL_AS argon2_block_t *out)
+                           PRIVATE_AS const argon2_options_t *options, GLOBAL_AS argon2_block_t *out)
 {
   u64 blockhash[16] = { 0 };
 
@@ -105,8 +105,8 @@ DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *sal
   // Generate the first two blocks of each lane
   for (u32 lane = 0; lane < options->parallelism; lane++)
   {
-    argon2_initial_block ((u32 *) blockhash, lane, 0, options->parallelism, out);
-    argon2_initial_block ((u32 *) blockhash, lane, 1, options->parallelism, out);
+    argon2_initial_block ((PRIVATE_AS u32 *) blockhash, lane, 0, options->parallelism, out);
+    argon2_initial_block ((PRIVATE_AS u32 *) blockhash, lane, 1, options->parallelism, out);
   }
 }
 
@@ -118,11 +118,11 @@ DECLSPEC u64 trunc_mul (u64 x, u64 y)
   return hl32_to_64_S (hc_umulhi (xlo, ylo), (u32) (xlo * ylo));
 }
 
-DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const argon2_pos_t *pos, u32 index, u64 pseudo_random)
+DECLSPEC inline u32 argon2_ref_address (PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, u32 index, u64 pseudo_random)
 {
-  u32 ref_lane;
-  u32 ref_area;
-  u32 ref_index;
+  u32 ref_lane = 0;
+  u32 ref_area = 0;
+  u32 ref_index = 0;
 
   if ((pos->pass == 0) && (pos->slice == 0))
   {
@@ -134,6 +134,7 @@ DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const a
   }
 
   ref_area  = (pos->pass == 0) ? pos->slice : (ARGON2_SYNC_POINTS - 1);
+
   ref_area *= options->segment_length;
 
   if ((ref_lane == pos->lane) || (index == 0))
@@ -141,7 +142,10 @@ DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const a
       ref_area += (index - 1);
   }
 
+  // if ref_area == 0xFFFFFFFF => bug
+
   const u32 j1 = l32_from_64_S (pseudo_random);
+
   ref_index = (ref_area - 1 - hc_umulhi (ref_area, hc_umulhi (j1, j1)));
 
   if (pos->pass > 0)
@@ -157,68 +161,79 @@ DECLSPEC inline u32 argon2_ref_address (const argon2_options_t *options, const a
   return (options->parallelism * ref_index) + ref_lane;
 }
 
-DECLSPEC void swap_u64 (u64 *x, u64 *y)
+DECLSPEC void swap_u64 (PRIVATE_AS u64 *x, PRIVATE_AS u64 *y)
 {
   u64 tmp = *x;
   *x = *y;
   *y = tmp;
 }
 
-DECLSPEC void transpose_permute_block (u64 R[4], int thread)
+DECLSPEC void transpose_permute_block (u64 R[4], int argon2_thread)
 {
-  if (thread & 0x08)
+  if (argon2_thread & 0x08)
   {
     swap_u64 (&R[0], &R[2]);
     swap_u64 (&R[1], &R[3]);
   }
-  if (thread & 0x04)
+  if (argon2_thread & 0x04)
   {
     swap_u64 (&R[0], &R[1]);
     swap_u64 (&R[2], &R[3]);
   }
 }
 
-DECLSPEC int argon2_shift (int idx, int thread)
+DECLSPEC int argon2_shift (int idx, int argon2_thread)
 {
   const int delta = ((idx & 0x02) << 3) + (idx & 0x01);
-  return (thread & 0x0e) | (((thread & 0x11) + delta + 0x0e) & 0x11);
+  return (argon2_thread & 0x0e) | (((argon2_thread & 0x11) + delta + 0x0e) & 0x11);
 }
 
-DECLSPEC void argon2_hash_block (u64 R[4], int thread, LOCAL_AS u64 *shuffle_buf)
+DECLSPEC void argon2_hash_block (u64 R[4], int argon2_thread, LOCAL_AS u64 *shuffle_buf, int argon2_lsz)
 {
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], thread ^ (idx << 2));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz);
 
-  transpose_permute_block (R, thread);
+  transpose_permute_block (R, argon2_thread);
 
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], thread ^ (idx << 2));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz);
 
   ARGON2_G(R[0], R[1], R[2], R[3]);
 
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx],  (thread & 0x1c) | ((thread + idx) & 0x03));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx],  (argon2_thread & 0x1c) | ((argon2_thread + idx) & 0x03), argon2_thread, argon2_lsz);
 
   ARGON2_G(R[0], R[1], R[2], R[3]);
 
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], ((thread & 0x1c) | ((thread - idx) & 0x03)) ^ (idx << 2));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], ((argon2_thread & 0x1c) | ((argon2_thread - idx) & 0x03)) ^ (idx << 2), argon2_thread, argon2_lsz);
 
-  transpose_permute_block (R, thread);
+  transpose_permute_block (R, argon2_thread);
 
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], thread ^ (idx << 2));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz);
 
   ARGON2_G(R[0], R[1], R[2], R[3]);
 
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift (idx, thread));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift (idx, argon2_thread), argon2_thread, argon2_lsz);
 
   ARGON2_G(R[0], R[1], R[2], R[3]);
 
-  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift ((4 - idx), thread));
+  for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift ((4 - idx), argon2_thread), argon2_thread, argon2_lsz);
 }
 
-DECLSPEC void argon2_next_addresses (const argon2_options_t *options, const argon2_pos_t *pos, u32 *addresses, u32 start_index, u32 thread, LOCAL_AS u64 *shuffle_buf)
+DECLSPEC void argon2_next_addresses (PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, PRIVATE_AS u32 *addresses, u32 start_index, u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz)
 {
-  u64 Z[4] = { 0 };
+  u64 Z[4];
+
+  Z[0] = 0;
+  Z[1] = 0;
+  Z[2] = 0;
+  Z[3] = 0;
+
   u64 tmp[4];
 
-  switch (thread)
+  tmp[0] = 0;
+  tmp[1] = 0;
+  tmp[2] = 0;
+  tmp[3] = 0;
+
+  switch (argon2_thread)
   {
     case 0:  Z[0] = pos->pass;                   break;
     case 1:  Z[0] = pos->lane;                   break;
@@ -232,20 +247,22 @@ DECLSPEC void argon2_next_addresses (const argon2_options_t *options, const argo
 
   tmp[0] = Z[0];
 
-  argon2_hash_block (Z, thread, shuffle_buf);
+  argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz);
 
   Z[0]  ^= tmp[0];
 
   for (u32 idx = 0; idx < 4; idx++) tmp[idx] = Z[idx];
 
-  argon2_hash_block (Z, thread, shuffle_buf);
+  argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz);
 
   for (u32 idx = 0; idx < 4; idx++) Z[idx]  ^= tmp[idx];
 
-  for (u32 i = 0, index = (start_index + thread); i < 4; i++, index += THREADS_PER_LANE)
+  for (u32 i = 0, index = (start_index + argon2_thread); i < 4; i++, index += THREADS_PER_LANE)
   {
     addresses[i] = argon2_ref_address (options, pos, index, Z[i]);
   }
+
+  // if addresses[0] == 0xFFFFFFFE => bug
 }
 
 DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
@@ -265,20 +282,20 @@ DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
   return -1;
 }
 
-DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 thread)
+DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 argon2_thread)
 {
   // Apply wrap-around to previous block index if the current block is the first block in the lane
   const u32 prev_in_lane = (index_in_lane == 0) ? (options->lane_length - 1) : (index_in_lane - 1);
 
   GLOBAL_AS argon2_block_t *prev_block = &blocks[(prev_in_lane * options->parallelism) + lane];
 
-  for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + thread];
+  for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + argon2_thread];
 
   return &blocks[(index_in_lane * options->parallelism) + lane];
 }
 
-DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4],
-                                      u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 thread, LOCAL_AS u64 *shuffle_buf)
+DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4],
+                                      u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz)
 {
   for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism)
   {
@@ -287,12 +304,12 @@ DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, const ar
     if (indep_addr)
     {
       ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % ARGON2_SYNC_POINTS);
-      ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, index);
+      ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, index, argon2_thread, argon2_lsz);
     }
     else
     {
       ref_address = argon2_ref_address (options, pos, index, R[0]);
-      ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, 0);
+      ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, 0, argon2_thread, argon2_lsz);
     }
 
     GLOBAL_AS const argon2_block_t *ref_block = &blocks[ref_address];
@@ -302,32 +319,30 @@ DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, const ar
     // First pass is overwrite, next passes are XOR with previous
     if ((pos->pass > 0) && (options->version != ARGON2_VERSION_10))
     {
-      for (u32 idx = 0; idx < 4; idx++) tmp[idx]  = cur_block->values[(idx * THREADS_PER_LANE) + thread];
+      for (u32 idx = 0; idx < 4; idx++) tmp[idx]  = cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread];
     }
 
-    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= ref_block->values[(idx * THREADS_PER_LANE) + thread];
+    for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= ref_block->values[(idx * THREADS_PER_LANE) + argon2_thread];
 
     for (u32 idx = 0; idx < 4; idx++) tmp[idx] ^= R[idx];
 
-    argon2_hash_block (R, thread, shuffle_buf);
+    argon2_hash_block (R, argon2_thread, shuffle_buf, argon2_lsz);
 
     for (u32 idx = 0; idx < 4; idx++) R[idx]   ^= tmp[idx];
 
-    for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + thread] = R[idx];
+    for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread] = R[idx];
   }
 }
 
-DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf)
+DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf, const u32 argon2_thread, const u32 argon2_lsz)
 {
-  const u32  thread       = get_local_id(0);
-
   // We have already generated the first two blocks of each lane (for the first pass)
   const u32 skip_blocks   = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0;
   const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks;
 
   u64 R[4];
 
-  GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, thread);
+  GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, argon2_thread);
 
   if ((options->type == TYPE_I) || ((options->type == TYPE_ID) && (pos->pass == 0) && (pos->slice <= 1)))
   {
@@ -336,10 +351,10 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon
       const u32 start_index = (block_index == 0) ? skip_blocks : block_index;
       const u32 end_index   = MIN(((start_index | 127) + 1), options->segment_length);
 
-      u32 addresses[4];
+      u32 addresses[4] = { 0, 0, 0, 0 };
 
-      argon2_next_addresses (options, pos, addresses, block_index, thread, shuffle_buf);
-      argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, thread, shuffle_buf);
+      argon2_next_addresses (options, pos, addresses, block_index, argon2_thread, shuffle_buf, argon2_lsz);
+      argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz);
 
       cur_block += (end_index - start_index) * options->parallelism;
     }
@@ -348,11 +363,11 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon
   {
     u32 addresses[4] = { 0 };
 
-    argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, thread, shuffle_buf);
+    argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz);
   }
 }
 
-DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out)
+DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u32 *out)
 {
   const u32 lane_length = options->lane_length;
   const u32 lanes = options->parallelism;
@@ -374,11 +389,11 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_optio
   ctx.h[0] ^= 0x40 ^ options->digest_len; 
 
   blake2b_update (&ctx, output_len, 4);
-  blake2b_update (&ctx, (u32 *) final_block.values, sizeof(final_block));
+  blake2b_update (&ctx, (PRIVATE_AS u32 *) final_block.values, sizeof(final_block));
 
   blake2b_final (&ctx);
 
-  for (int i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1)
+  for (uint i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1)
   {
     out [i + 0] = l32_from_64_S (ctx.h[idx]);
     out [i + 1] = h32_from_64_S (ctx.h[idx]);
diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h
index 922afc44a..a03b7c480 100644
--- a/OpenCL/inc_hash_argon2.h
+++ b/OpenCL/inc_hash_argon2.h
@@ -1,3 +1,4 @@
+
 /**
  * Author......: Netherlands Forensic Institute
  * License.....: MIT
@@ -23,15 +24,15 @@
 #define TYPE_ID 2
 
 #if defined IS_CUDA
-#define hc__shfl_sync(shfbuf,mask,var,srcLane) __shfl_sync ((mask),(var),(srcLane))
+#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) __shfl_sync ((mask),(var),(srcLane))
 #elif defined IS_HIP
 // attention hard coded 32 warps for hip here
-#define hc__shfl_sync(shfbuf,mask,var,srcLane) __shfl ((var),(srcLane),32)
+#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) __shfl ((var),(srcLane),32)
 #elif defined IS_OPENCL
-#define hc__shfl_sync(shfbuf,mask,var,srcLane) hc__shfl ((shfbuf),(var),(srcLane))
+#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) hc__shfl ((shfbuf),(var),(srcLane),(argon2_thread),(argon2_lsz))
 
 #if defined IS_AMD && defined IS_GPU
-DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane)
+DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
 {
   const u32 idx = src_lane << 2;
 
@@ -46,7 +47,7 @@ DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, co
   return out;
 }
 #elif defined IS_NV && defined IS_GPU
-DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane)
+DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
 {
   const u32 l32 = l32_from_64_S (var);
   const u32 h32 = h32_from_64_S (var);
@@ -67,22 +68,31 @@ DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, co
   return out;
 }
 #else
-DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane)
+DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
 {
-  const u32 lid = get_local_id (0);
-
-  shuffle_buf[lid] = var;
+  shuffle_buf[argon2_thread] = var;
 
   barrier (CLK_LOCAL_MEM_FENCE);
 
-  const u64 out = shuffle_buf[src_lane & 31];
+  const u64 out = shuffle_buf[src_lane & (argon2_lsz - 1)];
 
   return out;
 }
 #endif
 
 #elif defined IS_METAL
-//todo
+#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) hc__shfl ((shfbuf),(var),(srcLane),(argon2_thread),(argon2_lsz))
+
+DECLSPEC u64 hc__shfl (LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
+{
+  shuffle_buf[argon2_thread] = var;
+
+  SYNC_THREADS();
+
+  const u64 out = shuffle_buf[src_lane & (argon2_lsz - 1)];
+
+  return out;
+}
 #endif
 
 #define ARGON2_G(a,b,c,d)                \
@@ -140,8 +150,8 @@ typedef struct argon2_pos
 
 } argon2_pos_t;
 
-DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
-DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf);
-DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, const argon2_options_t *options, u32 *out);
+DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
+DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf, const u32 argon2_thread, const u32 argon2_lsz);
+DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u32 *out);
 
 #endif // INC_HASH_ARGON2_H
diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
index 483adf59c..6fba3590c 100644
--- a/OpenCL/m34000-pure.cl
+++ b/OpenCL/m34000-pure.cl
@@ -1,3 +1,4 @@
+
 /**
  * Author......: Netherlands Forensic Institute
  * License.....: MIT
@@ -63,6 +64,9 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
 
   if (bid >= GID_CNT) return;
 
+  const u32 argon2_thread = get_local_id (0);
+  const u32 argon2_lsz = get_local_size (0);
+
   LOCAL_VK u64 shuffle_bufs[ARGON2_PARALLELISM][32];
   LOCAL_AS u64 *shuffle_buf = shuffle_bufs[lid];
 
@@ -94,7 +98,7 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
   {
     for (pos.lane = lid; pos.lane < options.parallelism; pos.lane += lsz)
     {
-      argon2_fill_segment (argon2_extra->blocks, &options, &pos, shuffle_buf);
+      argon2_fill_segment (argon2_extra->blocks, &options, &pos, shuffle_buf, argon2_thread, argon2_lsz);
     }
 
     SYNC_THREADS ();

From e8cf8bd146a8678c0d85b6b63180a61cf7a3da99 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Thu, 3 Jul 2025 08:10:30 +0200
Subject: [PATCH 61/83] Fix OpenCL spawning unnecessary work-item due to
 redundant multiplication in new OPTS_TYPE_THREAD_MULTI_DISABLE mode. Prepare
 Metal section in run_kernel() for 2D kernel invocation related to new
 salt->salt_dimy variable. Move reusable Argon2 module code into separate file
 argon2_commit.c, similar to scrypt_commit.c, and update headers. Update
 existing hash mode 34000 to use argon2_commit.c.

---
 src/backend.c               |  43 +++++++--
 src/modules/argon2_common.c | 174 +++++++++++++++++++++++++++++++++++
 src/modules/module_34000.c  | 175 ++----------------------------------
 3 files changed, 221 insertions(+), 171 deletions(-)
 create mode 100644 src/modules/argon2_common.c

diff --git a/src/backend.c b/src/backend.c
index c10cf769f..74b6b96e9 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2860,7 +2860,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
     if (kernel_threads == 0) kernel_threads = 1;
 
-    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+    if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+    {
+      num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+    }
 
     if (kern_run == KERN_RUN_1)
     {
@@ -2898,10 +2901,37 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+    if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+    {
+      num_elements = round_up_multiple_32 (num_elements, kernel_threads);
+    }
+    else
+    {
+      num_elements = num_elements * kernel_threads;
+    }
 
-    const size_t global_work_size[3] = { num_elements,   1, 1 };
-    const size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+    size_t global_work_size[3] = { num_elements,   1, 1 };
+    size_t local_work_size[3]  = { kernel_threads, 1, 1 };
+
+    cl_uint work_dim = 1;
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
+    {
+      global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
+    }
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
+    {
+      global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
+    }
+
+    if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
+    {
+      global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
+    }
 
     double ms = 0;
 
@@ -2997,7 +3027,10 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
     }
     */
 
-    num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    if ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) == 0)
+    {
+      num_elements = round_up_multiple_64 (num_elements, kernel_threads);
+    }
 
     cl_event opencl_event;
 
diff --git a/src/modules/argon2_common.c b/src/modules/argon2_common.c
new file mode 100644
index 000000000..cfe47071e
--- /dev/null
+++ b/src/modules/argon2_common.c
@@ -0,0 +1,174 @@
+/**
+ * Author......: Netherlands Forensic Institute
+ * License.....: MIT
+ */
+
+#include <inttypes.h>
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+#include "memory.h"
+
+#define ARGON2_SYNC_POINTS  4
+#define ARGON2_BLOCK_SIZE   1024
+
+typedef struct argon2_tmp
+{
+  u32 state[4]; // just something for now
+
+} argon2_tmp_t;
+
+typedef struct argon2_options
+{
+  u32 type;
+  u32 version;
+
+  u32 iterations;
+  u32 parallelism;
+  u32 memory_usage_in_kib;
+
+  u32 segment_length;
+  u32 lane_length;
+  u32 memory_block_count;
+
+  u32 digest_len;
+
+} argon2_options_t;
+
+u32 argon2_module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_min = 32; // hard-coded in kernel
+
+  return kernel_threads_min;
+}
+
+u32 argon2_module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_threads_max = 32; // hard-coded in kernel
+
+  return kernel_threads_max;
+}
+
+u64 argon2_module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = 0; // we'll add some later
+
+  return tmp_size;
+}
+
+const char *argon2_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
+{
+  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
+
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
+
+  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+
+  const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
+
+  int   lines_sz  = 4096;
+  char *lines_buf = hcmalloc (lines_sz);
+  int   lines_pos = 0;
+
+  const u32 device_processors = device_param->device_processors;
+
+  const u32 device_maxworkgroup_size = device_param->device_maxworkgroup_size;
+
+  const u64 fixed_mem = (256 * 1024 * 1024); // some storage we need for pws[], tmps[], and others. Is around 72MiB in reality.
+
+  const u64 spill_mem = 2048 * device_processors * device_maxworkgroup_size; // 1600 according to ptxas
+
+  const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (fixed_mem + spill_mem);
+
+  u32 kernel_accel_new = device_processors;
+
+  if (kernel_accel_user)
+  {
+    kernel_accel_new = kernel_accel_user;
+  }
+  else
+  {
+    if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == false))
+    {
+      kernel_accel_new = available_mem / size_per_accel;
+
+      kernel_accel_new = MIN (kernel_accel_new, 1024); // 1024 = max supported
+    }
+  }
+
+  char *new_device_name = hcstrdup (device_param->device_name);
+
+  for (size_t i = 0; i < strlen (new_device_name); i++)
+  {
+    if (new_device_name[i] == ' ') new_device_name[i] = '_';
+  }
+
+  lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
+
+  hcfree (new_device_name);
+
+  return lines_buf;
+}
+
+u64 argon2_module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
+
+  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+
+  const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
+
+  const u64 size_argon2 = device_param->kernel_accel_max * size_per_accel;
+
+  return size_argon2;
+}
+
+u64 argon2_module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
+{
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
+
+  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+  const u32 parallelism        = (options->parallelism)        ? options->parallelism        : options_st->parallelism;
+
+  for (u32 i = 1; i < hashes->salts_cnt; i++)
+  {
+    if ((memory_block_count != options[i].memory_block_count)
+     || (parallelism        != options[i].parallelism))
+    {
+      return (1ULL << 63) + i;
+    }
+  }
+
+  // now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
+
+  if ((hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) == 0)
+  {
+    if ((memory_block_count != options_st->memory_block_count)
+     || (parallelism        != options_st->parallelism))
+    {
+      return (1ULL << 62);
+    }
+  }
+
+  u64 tmp_size = sizeof (argon2_tmp_t);
+
+  return tmp_size;
+}
+
+char *argon2_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
+{
+  argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
+
+  char *jit_build_options = NULL;
+
+  hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
+
+  return jit_build_options;
+}
+
diff --git a/src/modules/module_34000.c b/src/modules/module_34000.c
index 60a37358c..34b621133 100644
--- a/src/modules/module_34000.c
+++ b/src/modules/module_34000.c
@@ -11,9 +11,6 @@
 #include "shared.h"
 #include "memory.h"
 
-#define ARGON2_SYNC_POINTS  4
-#define ARGON2_BLOCK_SIZE   1024
-
 static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
 static const u32   DGST_POS0      = 0;
 static const u32   DGST_POS1      = 1;
@@ -48,28 +45,7 @@ u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig,
 const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
 const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
-typedef struct argon2_tmp
-{
-  u32 state[4]; // just something for now
-
-} argon2_tmp_t;
-
-typedef struct argon2_options
-{
-  u32 type;
-  u32 version;
-
-  u32 iterations;
-  u32 parallelism;
-  u32 memory_usage_in_kib;
-
-  u32 segment_length;
-  u32 lane_length;
-  u32 memory_block_count;
-
-  u32 digest_len;
-
-} argon2_options_t;
+#include "argon2_common.c"
 
 static const char *SIGNATURE_ARGON2D  = "$argon2d$";
 static const char *SIGNATURE_ARGON2I  = "$argon2i$";
@@ -82,140 +58,6 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
-u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_min = 32; // hard-coded in kernel
-
-  return kernel_threads_min;
-}
-
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 32; // hard-coded in kernel
-
-  return kernel_threads_max;
-}
-
-u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u64 tmp_size = 0; // we'll add some later
-
-  return tmp_size;
-}
-
-const char *module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
-{
-  hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
-
-  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
-  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
-
-  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
-
-  const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
-
-  int   lines_sz  = 4096;
-  char *lines_buf = hcmalloc (lines_sz);
-  int   lines_pos = 0;
-
-  const u32 device_processors = device_param->device_processors;
-
-  const u32 device_maxworkgroup_size = device_param->device_maxworkgroup_size;
-
-  const u64 fixed_mem = (256 * 1024 * 1024); // some storage we need for pws[], tmps[], and others. Is around 72MiB in reality.
-
-  const u64 spill_mem = 2048 * device_processors * device_maxworkgroup_size; // 1600 according to ptxas
-
-  const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (fixed_mem + spill_mem);
-
-  u32 kernel_accel_new = device_processors;
-
-  if (kernel_accel_user)
-  {
-    kernel_accel_new = kernel_accel_user;
-  }
-  else
-  {
-    if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == false))
-    {
-      kernel_accel_new = available_mem / size_per_accel;
-
-      kernel_accel_new = MIN (kernel_accel_new, 1024); // 1024 = max supported
-    }
-  }
-
-  char *new_device_name = hcstrdup (device_param->device_name);
-
-  for (size_t i = 0; i < strlen (new_device_name); i++)
-  {
-    if (new_device_name[i] == ' ') new_device_name[i] = '_';
-  }
-
-  lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
-
-  hcfree (new_device_name);
-
-  return lines_buf;
-}
-
-u64 module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
-{
-  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
-  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
-
-  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
-
-  const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
-
-  const u64 size_argon2 = device_param->kernel_accel_max * size_per_accel;
-
-  return size_argon2;
-}
-
-u64 module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
-{
-  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
-  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
-
-  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
-  const u32 parallelism        = (options->parallelism)        ? options->parallelism        : options_st->parallelism;
-
-  for (u32 i = 1; i < hashes->salts_cnt; i++)
-  {
-    if ((memory_block_count != options[i].memory_block_count)
-     || (parallelism        != options[i].parallelism))
-    {
-      return (1ULL << 63) + i;
-    }
-  }
-
-  // now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
-
-  if ((hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) == 0)
-  {
-    if ((memory_block_count != options_st->memory_block_count)
-     || (parallelism        != options_st->parallelism))
-    {
-      return (1ULL << 62);
-    }
-  }
-
-  u64 tmp_size = sizeof (argon2_tmp_t);
-
-  return tmp_size;
-}
-
-char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
-{
-  argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
-
-  char *jit_build_options = NULL;
-
-  hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
-
-  return jit_build_options;
-}
-
 int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
 {
   u32 *digest = (u32 *) digest_buf;
@@ -393,9 +235,9 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_dgst_size                = module_dgst_size;
   module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
   module_ctx->module_esalt_size               = module_esalt_size;
-  module_ctx->module_extra_buffer_size        = module_extra_buffer_size;
-  module_ctx->module_extra_tmp_size           = module_extra_tmp_size;
-  module_ctx->module_extra_tuningdb_block     = module_extra_tuningdb_block;
+  module_ctx->module_extra_buffer_size        = argon2_module_extra_buffer_size;
+  module_ctx->module_extra_tmp_size           = argon2_module_extra_tmp_size;
+  module_ctx->module_extra_tuningdb_block     = argon2_module_extra_tuningdb_block;
   module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
   module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
   module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
@@ -421,14 +263,14 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hook23                   = MODULE_DEFAULT;
   module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
   module_ctx->module_hook_size                = MODULE_DEFAULT;
-  module_ctx->module_jit_build_options        = module_jit_build_options;
+  module_ctx->module_jit_build_options        = argon2_module_jit_build_options;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
-  module_ctx->module_kernel_threads_min       = module_kernel_threads_min;
+  module_ctx->module_kernel_threads_max       = argon2_module_kernel_threads_max;
+  module_ctx->module_kernel_threads_min       = argon2_module_kernel_threads_min;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
   module_ctx->module_opti_type                = module_opti_type;
@@ -447,7 +289,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_separator                = MODULE_DEFAULT;
   module_ctx->module_st_hash                  = module_st_hash;
   module_ctx->module_st_pass                  = module_st_pass;
-  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_tmp_size                 = argon2_module_tmp_size;
   module_ctx->module_unstable_warning         = MODULE_DEFAULT;
   module_ctx->module_warmup_disable           = MODULE_DEFAULT;
 }
+

From 4d39f881fd2f501b2b0251fc5259bae123be7e99 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 3 Jul 2025 10:26:51 +0200
Subject: [PATCH 62/83] support 2D/3D kernel invocation with Metal

---
 include/ext_metal.h |  2 +-
 src/backend.c       | 23 +++++++++--------------
 src/ext_metal.m     |  6 +++---
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/include/ext_metal.h b/include/ext_metal.h
index a7a7d37a9..85facc62b 100644
--- a/include/ext_metal.h
+++ b/include/ext_metal.h
@@ -111,7 +111,7 @@ int  hc_mtlCreateLibraryWithFile    (void *hashcat_ctx, mtl_device_id metal_devi
 int  hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder);
 int  hc_mtlSetCommandEncoderArg     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size);
 
-int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms);
+int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
 
 #endif // __APPLE__
 
diff --git a/src/backend.c b/src/backend.c
index 74b6b96e9..4fa01abb9 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2206,7 +2206,7 @@ int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
 
   double ms = 0;
 
-  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
 
   return 0;
 }
@@ -2234,7 +2234,7 @@ int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_
 
   double ms = 0;
 
-  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
 
   return 0;
 }
@@ -2265,7 +2265,7 @@ int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
   }
 
   if (num16m)
@@ -2913,31 +2913,26 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
     size_t global_work_size[3] = { num_elements,   1, 1 };
     size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
-    cl_uint work_dim = 1;
-
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
-      work_dim = 2;
     }
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
-      work_dim = 2;
     }
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
-      work_dim = 2;
     }
 
     double ms = 0;
 
     if (is_autotune == true)
     {
-      hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms);
+      hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
 
       // hc_mtlEncodeComputeCommand_pre() must be called before every hc_mtlEncodeComputeCommand()
       if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
@@ -2956,7 +2951,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms);
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
 
     if (rc_cc != -1)
     {
@@ -3349,7 +3344,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
@@ -3440,7 +3435,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
@@ -3524,7 +3519,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     double ms = 0;
 
-    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms);
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
 
     // release tmp_buf
 
@@ -3604,7 +3599,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size[0], local_work_size[0], &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
diff --git a/src/ext_metal.m b/src/ext_metal.m
index 6a51d48bd..cd23fb062 100644
--- a/src/ext_metal.m
+++ b/src/ext_metal.m
@@ -1314,10 +1314,10 @@ int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_com
   return 0;
 }
 
-int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms)
+int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms)
 {
-  MTLSize numThreadgroups = {local_work_size, 1, 1};
-  MTLSize threadsGroup = {global_work_size, 1, 1};
+  MTLSize numThreadgroups = {local_work_size[0], local_work_size[1], local_work_size[2]};
+  MTLSize threadsGroup = {global_work_size[0], global_work_size[1], global_work_size[2]};
 
   if (metal_command_encoder == nil)
   {

From bcc351068ffc0949a88a9c6c38b09b60f0c0897c Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Thu, 3 Jul 2025 22:06:32 +0200
Subject: [PATCH 63/83] Metal Backend: - added support to 2D/3D Compute -
 improved compute workloads calculation Makefile: - updated
 MACOSX_DEPLOYMENT_TARGET to 15.0 Unit tests: - updated install_modules.sh
 with Crypt::Argon2

Argon2 start works with Apple Metal
---
 OpenCL/inc_amp.h         |  2 +-
 OpenCL/inc_common.h      |  8 +++---
 OpenCL/inc_markov.h      |  6 ++---
 OpenCL/inc_platform.h    | 23 ++++++++++++++---
 OpenCL/inc_shared.h      | 10 ++++----
 docs/changes.txt         |  4 +++
 include/ext_metal.h      |  2 +-
 src/Makefile             |  2 +-
 src/backend.c            | 23 ++++++++++-------
 src/ext_metal.m          | 53 ++++++++++++++++++++++++++++++----------
 tools/install_modules.sh |  1 +
 11 files changed, 93 insertions(+), 41 deletions(-)

diff --git a/OpenCL/inc_amp.h b/OpenCL/inc_amp.h
index 5db6a21d8..e49b23959 100644
--- a/OpenCL/inc_amp.h
+++ b/OpenCL/inc_amp.h
@@ -16,7 +16,7 @@
   GLOBAL_AS   const bf_t          *bfs_buf,    \
   CONSTANT_AS const u32           &combs_mode, \
   CONSTANT_AS const u64           &gid_max,    \
-                    uint           hc_gid [[ thread_position_in_grid ]]
+                    uint3          hc_gid [[ thread_position_in_grid ]]
 
 #else // CUDA, HIP, OpenCL
 
diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h
index c24ecb524..d0b2ed989 100644
--- a/OpenCL/inc_common.h
+++ b/OpenCL/inc_common.h
@@ -124,10 +124,10 @@
 
 #if defined IS_METAL
 #define KERN_ATTR_MAIN_PARAMS                       \
-  uint hc_gid [[ thread_position_in_grid ]],        \
-  uint hc_lid [[ thread_position_in_threadgroup ]], \
-  uint hc_lsz [[ threads_per_threadgroup ]],        \
-  uint hc_bid [[ threadgroup_position_in_grid ]]
+  uint3 hc_gid [[ thread_position_in_grid ]],        \
+  uint3 hc_lid [[ thread_position_in_threadgroup ]], \
+  uint3 hc_lsz [[ threads_per_threadgroup ]],        \
+  uint3 hc_bid [[ threadgroup_position_in_grid ]]
 #endif // IS_METAL
 
 /*
diff --git a/OpenCL/inc_markov.h b/OpenCL/inc_markov.h
index 3aae8f7fc..1ec187b8e 100644
--- a/OpenCL/inc_markov.h
+++ b/OpenCL/inc_markov.h
@@ -19,7 +19,7 @@
   CONSTANT_AS const u32  &bits14,         \
   CONSTANT_AS const u32  &bits15,         \
   CONSTANT_AS const u64  &gid_max,        \
-                    uint  hc_gid [[ thread_position_in_grid ]]
+                    uint3 hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_R_MARKOV                \
   GLOBAL_AS         bf_t *pws_buf_r,      \
@@ -31,7 +31,7 @@
   CONSTANT_AS const u32  &bits14,         \
   CONSTANT_AS const u32  &bits15,         \
   CONSTANT_AS const u64  &gid_max,        \
-                    uint  hc_gid [[ thread_position_in_grid ]]
+                    uint3 hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_C_MARKOV                \
   GLOBAL_AS         pw_t *pws_buf,        \
@@ -43,7 +43,7 @@
   CONSTANT_AS const u32  &bits14,         \
   CONSTANT_AS const u32  &bits15,         \
   CONSTANT_AS const u64  &gid_max,        \
-                    uint  hc_gid [[ thread_position_in_grid ]]
+                    uint3 hc_gid [[ thread_position_in_grid ]]
 
 #else // CUDA, HIP, OpenCL
 
diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h
index e1ffdefcf..9729d4fad 100644
--- a/OpenCL/inc_platform.h
+++ b/OpenCL/inc_platform.h
@@ -73,10 +73,25 @@ DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
 DECLSPEC u32 hc_atomic_or  (volatile GLOBAL_AS u32 *p, volatile const u32 val);
 
-#define get_global_id(param) hc_gid
-#define get_local_id(param) hc_lid
-#define get_local_size(param) hc_lsz
-#define get_group_id(param) hc_bid
+#define get_global_id(dimindx)   \
+  ((dimindx) == 0 ? hc_gid.x :   \
+   (dimindx) == 1 ? hc_gid.y :   \
+   (dimindx) == 2 ? hc_gid.z : -1)
+
+#define get_group_id(dimindx)    \
+  ((dimindx) == 0 ? hc_bid.x :   \
+   (dimindx) == 1 ? hc_bid.y :   \
+   (dimindx) == 2 ? hc_bid.z : -1)
+
+#define get_local_id(dimindx)    \
+  ((dimindx) == 0 ? hc_lid.x :   \
+   (dimindx) == 1 ? hc_lid.y :   \
+   (dimindx) == 2 ? hc_lid.z : -1)
+
+#define get_local_size(dimindx)  \
+  ((dimindx) == 0 ? hc_lsz.x :   \
+   (dimindx) == 1 ? hc_lsz.y :   \
+   (dimindx) == 2 ? hc_lsz.z : -1)
 
 DECLSPEC u32x rotl32   (const u32x a, const int n);
 DECLSPEC u32x rotr32   (const u32x a, const int n);
diff --git a/OpenCL/inc_shared.h b/OpenCL/inc_shared.h
index 16f2e2c4c..6518c30c6 100644
--- a/OpenCL/inc_shared.h
+++ b/OpenCL/inc_shared.h
@@ -13,28 +13,28 @@
   GLOBAL_AS         u32      *pws_comp, \
   GLOBAL_AS         pw_t     *pws_buf,  \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_MEMSET            \
   GLOBAL_AS         uint4    *buf,      \
   CONSTANT_AS const u32      &value,    \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_BZERO             \
   GLOBAL_AS         uint4    *buf,      \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_ATINIT            \
   GLOBAL_AS         pw_t     *buf,      \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #define KERN_ATTR_GPU_UTF8_TO_UTF16     \
   GLOBAL_AS         pw_t     *pws_buf,  \
   CONSTANT_AS const u64      &gid_max,  \
-                    uint      hc_gid [[ thread_position_in_grid ]]
+                    uint3     hc_gid [[ thread_position_in_grid ]]
 
 #else // CUDA, HIP, OpenCL
 
diff --git a/docs/changes.txt b/docs/changes.txt
index 06e1e46bb..cfdba32b4 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -148,19 +148,23 @@
 - Status Code: Add specific return code for self-test fail (-11)
 - Scrypt: Increase buffer sizes in module for hash mode 8900 to allow longer scrypt digests
 - Unicode: Update UTF-8 to UTF-16 conversion to match RFC 3629
+- Unit tests: Updated install_modules.sh with Crypt::Argon2
 - User Options: Added error message when mixing --username and --show to warn users of exponential delay
 - MetaMask: update extraction tool to support MetaMask Mobile wallets
 - SecureCRT MasterPassphrase v2: update module, pure kernels and test unit. Add optimized kernels.
 - Metal Backend: added workaround to prevent 'Infinite Loop' bug when build kernels
 - Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel
+- Metal Backend: added support to 2D/3D Compute
 - Metal Backend: allow use of devices with Metal if runtime version is >= 200
 - Metal Backend: disable Metal devices only if at least one OpenCL device is active
+- Metal Backend: improved compute workloads calculation
 - Modules: Check UnpackSize to raise false positive with hc_decompress_rar
 - User Options: added --metal-compiler-runtime option
 - Hardware Monitor: avoid sprintf in src/ext_iokit.c
 - Hardware Monitor: Splitting hwmon_ctx_init function into smaller library-specific functions
 - Help: show supported hash-modes only with -hh
 - Makefile: prevent make failure with Apple Silicon in case of partial rebuild
+- Makefile: updated MACOSX_DEPLOYMENT_TARGET to 15.0
 - Rules: Rename best64.rule to best66.rule and remove the unknown section from it
 
 * changes v6.2.5 -> v6.2.6
diff --git a/include/ext_metal.h b/include/ext_metal.h
index 85facc62b..b51f09ff2 100644
--- a/include/ext_metal.h
+++ b/include/ext_metal.h
@@ -111,7 +111,7 @@ int  hc_mtlCreateLibraryWithFile    (void *hashcat_ctx, mtl_device_id metal_devi
 int  hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder);
 int  hc_mtlSetCommandEncoderArg     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size);
 
-int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
+int  hc_mtlEncodeComputeCommand     (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
 
 #endif // __APPLE__
 
diff --git a/src/Makefile b/src/Makefile
index 2ed479330..2cfbf9406 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -358,7 +358,7 @@ LFLAGS_NATIVE           += -lpthread
 endif # NetBSD
 
 ifeq ($(UNAME),Darwin)
-export MACOSX_DEPLOYMENT_TARGET=10.15
+export MACOSX_DEPLOYMENT_TARGET=15.0
 CFLAGS_NATIVE           := $(CFLAGS)
 CFLAGS_NATIVE           += -DWITH_HWMON
 
diff --git a/src/backend.c b/src/backend.c
index 4fa01abb9..3109f8918 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -2206,7 +2206,7 @@ int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
 
   double ms = 0;
 
-  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
 
   return 0;
 }
@@ -2234,7 +2234,7 @@ int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_
 
   double ms = 0;
 
-  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+  if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
 
   return 0;
 }
@@ -2265,7 +2265,7 @@ int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
 
   if (num16m)
@@ -2910,29 +2910,34 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       num_elements = num_elements * kernel_threads;
     }
 
+    unsigned int work_dim = 1;
+
     size_t global_work_size[3] = { num_elements,   1, 1 };
     size_t local_work_size[3]  = { kernel_threads, 1, 1 };
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
     }
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
     }
 
     if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3))
     {
       global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy;
+      work_dim = 2;
     }
 
     double ms = 0;
 
     if (is_autotune == true)
     {
-      hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
+      hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, work_dim, global_work_size, local_work_size, &ms);
 
       // hc_mtlEncodeComputeCommand_pre() must be called before every hc_mtlEncodeComputeCommand()
       if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1;
@@ -2951,7 +2956,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
       }
     }
 
-    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, work_dim, global_work_size, local_work_size, &ms);
 
     if (rc_cc != -1)
     {
@@ -3344,7 +3349,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
@@ -3435,7 +3440,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
@@ -3519,7 +3524,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
     double ms = 0;
 
-    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms);
+    const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms);
 
     // release tmp_buf
 
@@ -3599,7 +3604,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device
 
     double ms = 0;
 
-    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1;
+    if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1;
   }
   #endif // __APPLE__
 
diff --git a/src/ext_metal.m b/src/ext_metal.m
index cd23fb062..a05c1a0e7 100644
--- a/src/ext_metal.m
+++ b/src/ext_metal.m
@@ -195,11 +195,14 @@ static int hc_mtlBuildOptionsToDict (void *hashcat_ctx, const char *build_option
   }
 
   // if set, add INCLUDE_PATH to hack Apple kernel build from source limitation on -I usage
+
   if (include_path != nil)
   {
     NSString *path_key = @"INCLUDE_PATH";
     NSString *path_value = [NSString stringWithCString: include_path encoding: NSUTF8StringEncoding];
+
     // Include path may contain spaces, escape them with a backslash
+
     path_value = [path_value stringByReplacingOccurrencesOfString:@" " withString:@"\\ "];
 
     [build_options_dict setObject:path_value forKey:path_key];
@@ -743,6 +746,7 @@ int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_libra
   dispatch_queue_t queue = dispatch_get_global_queue (DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
 
   // if no user-defined runtime, set to METAL_COMPILER_RUNTIME
+
   long timeout = (user_options->metal_compiler_runtime > 0) ? user_options->metal_compiler_runtime : METAL_COMPILER_RUNTIME;
 
   dispatch_time_t when = dispatch_time (DISPATCH_TIME_NOW,NSEC_PER_SEC * timeout);
@@ -1314,10 +1318,21 @@ int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_com
   return 0;
 }
 
-int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms)
+int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms)
 {
-  MTLSize numThreadgroups = {local_work_size[0], local_work_size[1], local_work_size[2]};
-  MTLSize threadsGroup = {global_work_size[0], global_work_size[1], global_work_size[2]};
+  MTLSize threadsPerThreadgroup =
+  {
+    local_work_size[0],
+    local_work_size[1],
+    local_work_size[2]
+  };
+
+  MTLSize threadgroupsPerGrid =
+  {
+    (global_work_size[0] + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width,
+    work_dim > 1 ? (global_work_size[1] + threadsPerThreadgroup.height - 1) / threadsPerThreadgroup.height : 1,
+    work_dim > 2 ? (global_work_size[2] + threadsPerThreadgroup.depth - 1) / threadsPerThreadgroup.depth : 1
+  };
 
   if (metal_command_encoder == nil)
   {
@@ -1333,7 +1348,7 @@ int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_com
     return -1;
   }
 
-  [metal_command_encoder dispatchThreadgroups: threadsGroup threadsPerThreadgroup: numThreadgroups];
+  [metal_command_encoder dispatchThreadgroups: threadgroupsPerGrid threadsPerThreadgroup: threadsPerThreadgroup];
 
   [metal_command_encoder endEncoding];
   [metal_command_buffer commit];
@@ -1377,17 +1392,22 @@ int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device,
 
   if (k_string != nil)
   {
-    id <MTLLibrary> r = [metal_device newLibraryWithFile: k_string error: &error];
+    NSURL *libURL = [NSURL fileURLWithPath: k_string];
 
-    if (error != nil)
+    if (libURL != nil)
     {
-      event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
-      return -1;
+      id <MTLLibrary> r = [metal_device newLibraryWithURL: libURL error:&error];
+
+      if (error != nil)
+      {
+        event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
+        return -1;
+      }
+
+      *metal_library = r;
+
+      return 0;
     }
-
-    *metal_library = r;
-
-    return 0;
   }
 
   return -1;
@@ -1420,10 +1440,17 @@ int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device
       }
 
       compileOptions.preprocessorMacros = build_options_dict;
+      /*
+      compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize;
+      compileOptions.mathMode = MTLMathModeSafe;
+      // compileOptions.mathMode = MTLMathModeRelaxed;
+      // compileOptions.enableLogging = true;
+      // compileOptions.fastMathEnabled = false;
+      */
     }
 
     // todo: detect current os version and choose the right
-//    compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
+    // compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
 /*
     if (@available(macOS 12.0, *))
     {
diff --git a/tools/install_modules.sh b/tools/install_modules.sh
index 683d20000..948856e34 100755
--- a/tools/install_modules.sh
+++ b/tools/install_modules.sh
@@ -18,6 +18,7 @@ cpan install Authen::Passphrase::LANManager \
              Bitcoin::Crypto::Base58        \
              Compress::Zlib                 \
              Convert::EBCDIC                \
+             Crypt::Argon2                  \
              Crypt::AuthEnc::GCM            \
              Crypt::Camellia                \
              Crypt::CBC                     \

From 80803e2ea5ccdf7b73350ac6a5c62816db864386 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Fri, 4 Jul 2025 08:04:44 +0200
Subject: [PATCH 64/83] fix -a9 by add missing get_global_id() in m34000_loop

---
 OpenCL/m34000-pure.cl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
index 6fba3590c..ba87d835c 100644
--- a/OpenCL/m34000-pure.cl
+++ b/OpenCL/m34000-pure.cl
@@ -58,6 +58,7 @@ KERNEL_FQ KERNEL_FA void m34000_init (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
 
 KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
 {
+  const u64 gid = get_global_id (0);
   const u64 bid = get_group_id (0);
   const u64 lid = get_local_id (1);
   const u64 lsz = get_local_size (1);

From 25b9e67470b8963ea76193467dfb306858e13be3 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Fri, 4 Jul 2025 08:09:56 +0200
Subject: [PATCH 65/83] make error messages on hashes_init_stage5() generic

---
 src/hashes.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/hashes.c b/src/hashes.c
index 72ab14433..40d94941f 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -2386,13 +2386,13 @@ int hashes_init_stage5 (hashcat_ctx_t *hashcat_ctx)
 
         char *st_hash = strdup (tmp_buf);
 
-        event_log_error (hashcat_ctx, "ERROR: Incompatible self-test SCRYPT configuration detected.");
+        event_log_error (hashcat_ctx, "ERROR: Incompatible self-test configuration detected.");
 
         event_log_warning (hashcat_ctx, "The specified target hash:");
         event_log_warning (hashcat_ctx, "  -> %s", user_hash);
-        event_log_warning (hashcat_ctx, "does not match the SCRYPT configuration of the self-test hash:");
+        event_log_warning (hashcat_ctx, "does not match the configuration of the self-test hash:");
         event_log_warning (hashcat_ctx, "  -> %s", st_hash);
-        event_log_warning (hashcat_ctx, "The JIT-compiled kernel for this SCRYPT configuration may be incompatible.");
+        event_log_warning (hashcat_ctx, "The JIT-compiled kernel for this configuration may be incompatible.");
         event_log_warning (hashcat_ctx, "You must disable the self-test functionality or recompile the plugin with a matching self-test hash.");
         event_log_warning (hashcat_ctx, "To disable the self-test, use the --self-test-disable option.");
         event_log_warning (hashcat_ctx, NULL);
@@ -2414,11 +2414,11 @@ int hashes_init_stage5 (hashcat_ctx_t *hashcat_ctx)
 
         char *user_hash2 = strdup (tmp_buf);
 
-        event_log_error (hashcat_ctx, "ERROR: Mixed SCRYPT configuration detected.");
+        event_log_error (hashcat_ctx, "ERROR: Mixed configuration detected.");
 
         event_log_warning (hashcat_ctx, "The specified target hash:");
         event_log_warning (hashcat_ctx, "  -> %s", user_hash);
-        event_log_warning (hashcat_ctx, "does not match the SCRYPT configuration of another target hash:");
+        event_log_warning (hashcat_ctx, "does not match the configuration of another target hash:");
         event_log_warning (hashcat_ctx, "  -> %s", user_hash2);
         event_log_warning (hashcat_ctx, "Please run these hashes in separate cracking sessions.");
         event_log_warning (hashcat_ctx, NULL);

From d3983edaf22f5370d1bb136c930460bcad2d9f6e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Fri, 4 Jul 2025 21:51:32 +0200
Subject: [PATCH 66/83] Improved handling in get_opencl_kernel_wgs()

There are cases where we fix the thread count in a kernel using
FIXED_LOCAL_SIZE, but when the runtime loads the kernel binary, it
reports that it can only execute it with a different thread count.
According to the OpenCL specification, this can happen due to register
pressure.

However, we fix the thread count for a specific reason, and we choose to
accept potential register spilling to global memory. A warning is now
issued to inform the user about the runtime's suggested thread count,
allowing them to override it via the command line if they encounter
issues.

Also fixed the thread count for -m 10700 on NVIDIA's OpenCL, where 4
bytes are always lost for an unknown reason (similar to the issue seen
in bcrypt).
---
 src/backend.c              |  9 ++++++-
 src/modules/module_10700.c | 53 ++++++++++++++++++++++----------------
 2 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/src/backend.c b/src/backend.c
index 3109f8918..e324a2e01 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -9336,7 +9336,14 @@ static int get_opencl_kernel_wgs (hashcat_ctx_t *hashcat_ctx, hc_device_param_t
 
   if (cwgs_total > 0)
   {
-    kernel_threads = MIN (kernel_threads, (u32) cwgs_total);
+    if (kernel_threads < cwgs_total)
+    {
+      // Very likely some bug, because the runtime was unable to follow our requirement to run N threads guaranteed on this kernel
+
+      event_log_warning (hashcat_ctx, "* Device #%u: Runtime returned CL_KERNEL_WORK_GROUP_SIZE=%d, but CL_KERNEL_COMPILE_WORK_GROUP_SIZE=%d. Use -T%d if you run into problems.", device_param->device_id + 1, (int) kernel_threads, (int) cwgs_total, (int) kernel_threads);
+    }
+
+    kernel_threads = cwgs_total;
   }
 
   *result = kernel_threads;
diff --git a/src/modules/module_10700.c b/src/modules/module_10700.c
index 4a7725e52..b0bdd4a41 100644
--- a/src/modules/module_10700.c
+++ b/src/modules/module_10700.c
@@ -152,38 +152,47 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
 
 char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
+  const u32 shared_size_scratch = (32 + 64 + 16); // LOCAL_VK u32 s_sc[FIXED_LOCAL_SIZE][PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
+  const u32 shared_size_aes     = (5 * 1024);     // LOCAL_VK u32 s_te0[256];
+
   char *jit_build_options = NULL;
 
-  if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
   {
-    u32 native_threads = 0;
+    hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", 1);
+  }
+  else
+  {
+    u32 overhead = 0;
 
-    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+    if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
     {
-      native_threads = 1;
-    }
-    else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
-    {
-      #if defined (__APPLE__)
+      // note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
+      // Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
+      // on my development system. no clue where the 4 bytes are spent.
+      // I did some research on this and it seems to be related with the datatype.
+      // For example, if i used u8 instead, there's only 1 byte wasted.
 
-      native_threads = 32;
-
-      #else
-
-      if (device_param->device_local_mem_size < 49152)
+      if (device_param->is_opencl == true)
       {
-        native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
+        overhead = 1;
       }
-      else
-      {
-        // to go over 48KiB, we need to use dynamic shared mem
-        native_threads = 49152 / 128;
-      }
-
-      #endif
     }
 
-    hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
+    const u32 device_local_mem_size = MIN (device_param->device_local_mem_size, 48*1024);
+
+    u32 fixed_local_size = ((device_local_mem_size - overhead) - shared_size_aes) / shared_size_scratch;
+
+    if (user_options->kernel_threads_chgd == true)
+    {
+      fixed_local_size = user_options->kernel_threads;
+    }
+    else
+    {
+      if (fixed_local_size > device_param->kernel_preferred_wgs_multiple) fixed_local_size -= fixed_local_size % device_param->kernel_preferred_wgs_multiple;
+    }
+
+    hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", fixed_local_size);
   }
 
   return jit_build_options;

From d2656e376d597515fa89e49b32c2d3a2121eb273 Mon Sep 17 00:00:00 2001
From: red <redongh@users.noreply.github.com>
Date: Sat, 5 Jul 2025 00:30:39 +0200
Subject: [PATCH 67/83] Update hashcat-python-plugin-development-guide.md

clarify location of custom Python scripts to be run without creating a dedicated module.
---
 docs/hashcat-python-plugin-development-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/hashcat-python-plugin-development-guide.md b/docs/hashcat-python-plugin-development-guide.md
index 308b6d918..b93f2d66c 100644
--- a/docs/hashcat-python-plugin-development-guide.md
+++ b/docs/hashcat-python-plugin-development-guide.md
@@ -210,13 +210,13 @@ Notes:
 
 If you modify one of these plugin files, there's a trade-off: you won’t be able to contribute that code directly to the upstream Hashcat repository, since those files are meant to remain clean for demonstration purposes.
 
-To address this, the assimilation bridge provides a generic parameter that users can specify via the command line. In the case of the Python bridge, only the first parameter is used. You can override the Python script to be loaded using `--bridge-parameter1`:
+To address this, the assimilation bridge provides a generic parameter that users can specify via the command line. In the case of the Python bridge, only the first parameter is used. Using `--bridge-parameter1` allows you to override the Python script to be loaded:
 
 ```
-$ ./hashcat -m 73000 --bridge-parameter1 myimplementation.py hash.txt wordlist.txt ...
+$ ./hashcat -m 73000 --bridge-parameter1 ./Python/myimplementation.py hash.txt wordlist.txt ...
 ```
 
-This tells the Python bridge plugin to load `myimplementation.py` instead of the default `generic_hash_mp.py`. This approach is especially useful if you plan to contribute `myimplementation.py` to the upstream Hashcat repository. If you choose to stay within the generic mode, your Python code won’t have a dedicated hash mode, and you'll need to instruct users to use the `--bridge-parameter1` flag to load your implementation.
+This tells the Python bridge plugin to load `myimplementation.py` located in the local `Python` subdirectory instead of the default `generic_hash_mp.py`. This approach is especially useful if you plan to contribute `myimplementation.py` to the upstream Hashcat repository. If you choose to stay within the generic mode, your Python code won’t have a dedicated hash mode, and you'll need to instruct users to use the `--bridge-parameter1` flag to load your implementation.
 
 ### Design Tradeoffs and Format Considerations
 

From 381b2cac67d5d54d139baab318e7bc6a326b332d Mon Sep 17 00:00:00 2001
From: red <redongh@users.noreply.github.com>
Date: Sat, 5 Jul 2025 00:36:01 +0200
Subject: [PATCH 68/83] Update BUILD_WSL.md

Bump version of mingw-w64-x86_64-python-3.12.XX-X-any.pkg.tar.zst to latest.
---
 BUILD_WSL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BUILD_WSL.md b/BUILD_WSL.md
index cd5b6fd5b..957208381 100644
--- a/BUILD_WSL.md
+++ b/BUILD_WSL.md
@@ -19,7 +19,7 @@ cd win-iconv/
 cmake -D WIN_ICONV_BUILD_EXECUTABLE=OFF -D CMAKE_INSTALL_PREFIX=/opt/win-iconv-64 -D CMAKE_CXX_COMPILER=$(which x86_64-w64-mingw32-g++) -D CMAKE_C_COMPILER=$(which x86_64-w64-mingw32-gcc) -D CMAKE_SYSTEM_NAME=Windows
 sudo make install
 cd ../
-wget https://repo.msys2.org/mingw/mingw64/mingw-w64-x86_64-python-3.12.10-1-any.pkg.tar.zst
+wget https://repo.msys2.org/mingw/mingw64/mingw-w64-x86_64-python-3.12.11-1-any.pkg.tar.zst
 sudo mkdir /opt/win-python
 sudo tar --zstd -xf mingw-w64-x86_64-python-3.12.10-1-any.pkg.tar.zst -C /opt/win-python
 ```

From 9457c62ef0d58114d7a7097f322063e9eabd0d8e Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sat, 5 Jul 2025 19:44:31 +0200
Subject: [PATCH 69/83] Removed redundant casts in inc_hash_blake2b.cl and
 inc_hash_blake2s.cl. Fixed parameter types in inc_hash_blake2b.cl and
 inc_hash_blake2s.cl for FINAL value. Added kernel code for -m 15400 to
 s04/s08/m04/m08, even if not needed, to help autotune find optimal workitem
 settings. Fixed a rare autotune case (e.g. in mode 18600) where threads_min
 was not a multiple of kernel_preferred_wgs_multiple, and changes it so that
 as long as it only threads_min is affected and not threads_max, we now ensure
 at least kernel_preferred_wgs_multiple. Improved autotune logic for best
 thread count: double thread count until reaching the device's preferred
 multiple, then increase in steps of that multiple while comparing efficiency
 vs. runtime, and select the configuration with best efficiency, not highest
 thread count. Always set funnelshift support to true for HIP devices, as it
 always reports false. Set minimum loop count to 250 for all VeraCrypt modes
 with PIM brute-force support.

---
 OpenCL/inc_hash_blake2b.cl    |  10 +-
 OpenCL/inc_hash_blake2s.cl    |  14 +-
 OpenCL/m15400_a3-optimized.cl | 333 +++++++++++++++++++++++++++++++++-
 OpenCL/m18600-pure.cl         |   2 +-
 src/autotune.c                |  41 ++++-
 src/backend.c                 |   2 +-
 src/modules/module_13711.c    |   9 +-
 src/modules/module_13712.c    |   9 +-
 src/modules/module_13713.c    |   9 +-
 src/modules/module_13721.c    |   9 +-
 src/modules/module_13722.c    |   9 +-
 src/modules/module_13723.c    |   9 +-
 src/modules/module_13731.c    |   9 +-
 src/modules/module_13732.c    |   9 +-
 src/modules/module_13733.c    |   9 +-
 src/modules/module_13751.c    |   9 +-
 src/modules/module_13752.c    |   9 +-
 src/modules/module_13753.c    |   9 +-
 src/modules/module_13771.c    |   9 +-
 src/modules/module_13772.c    |   9 +-
 src/modules/module_13773.c    |   9 +-
 src/modules/module_29411.c    |   9 +-
 src/modules/module_29412.c    |   9 +-
 src/modules/module_29413.c    |   9 +-
 src/modules/module_29421.c    |   9 +-
 src/modules/module_29422.c    |   9 +-
 src/modules/module_29423.c    |   9 +-
 src/modules/module_29431.c    |   9 +-
 src/modules/module_29432.c    |   9 +-
 src/modules/module_29433.c    |   9 +-
 src/modules/module_29451.c    |   9 +-
 src/modules/module_29452.c    |   9 +-
 src/modules/module_29453.c    |   9 +-
 src/modules/module_29471.c    |   9 +-
 src/modules/module_29472.c    |   9 +-
 src/modules/module_29473.c    |   9 +-
 36 files changed, 615 insertions(+), 57 deletions(-)

diff --git a/OpenCL/inc_hash_blake2b.cl b/OpenCL/inc_hash_blake2b.cl
index 03a44b3ff..b205b18a7 100644
--- a/OpenCL/inc_hash_blake2b.cl
+++ b/OpenCL/inc_hash_blake2b.cl
@@ -409,7 +409,7 @@ DECLSPEC void blake2b_update (PRIVATE_AS blake2b_ctx_t *ctx, PRIVATE_AS const u3
   u32 w6[4];
   u32 w7[4];
 
-  const int limit = (const int) len - 128; // int type needed, could be negative
+  const int limit = len - 128; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -499,7 +499,7 @@ DECLSPEC void blake2b_update_global (PRIVATE_AS blake2b_ctx_t *ctx, GLOBAL_AS co
   u32 w6[4];
   u32 w7[4];
 
-  const int limit = (const int) len - 128; // int type needed, could be negative
+  const int limit = len - 128; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -580,7 +580,7 @@ DECLSPEC void blake2b_update_global (PRIVATE_AS blake2b_ctx_t *ctx, GLOBAL_AS co
 
 DECLSPEC void blake2b_final (PRIVATE_AS blake2b_ctx_t *ctx)
 {
-  blake2b_transform (ctx->h, ctx->m, ctx->len, BLAKE2B_FINAL);
+  blake2b_transform (ctx->h, ctx->m, ctx->len, (u64) BLAKE2B_FINAL);
 }
 
 DECLSPEC void blake2b_transform_vector (PRIVATE_AS u64x *h, PRIVATE_AS const u64x *m, const u32x len, const u64 f0)
@@ -813,7 +813,7 @@ DECLSPEC void blake2b_update_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx, PRIVA
   u32x w6[4];
   u32x w7[4];
 
-  const int limit = (const int) len - 128; // int type needed, could be negative
+  const int limit = len - 128; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -894,5 +894,5 @@ DECLSPEC void blake2b_update_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx, PRIVA
 
 DECLSPEC void blake2b_final_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx)
 {
-  blake2b_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2B_FINAL);
+  blake2b_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, (u64) BLAKE2B_FINAL);
 }
diff --git a/OpenCL/inc_hash_blake2s.cl b/OpenCL/inc_hash_blake2s.cl
index 77ecc02fb..99d4389fb 100644
--- a/OpenCL/inc_hash_blake2s.cl
+++ b/OpenCL/inc_hash_blake2s.cl
@@ -322,7 +322,7 @@ DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u3
   u32 w2[4];
   u32 w3[4];
 
-  const int limit = (const int) len - 64; // int type needed, could be negative
+  const int limit = len - 64; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -376,7 +376,7 @@ DECLSPEC void blake2s_update_global (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_AS co
   u32 w2[4];
   u32 w3[4];
 
-  const int limit = (const int) len - 64; // int type needed, could be negative
+  const int limit = len - 64; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -516,7 +516,7 @@ DECLSPEC void blake2s_update_global_swap (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_
   u32 w2[4];
   u32 w3[4];
 
-  const int limit = (const int) len - 64; // int type needed, could be negative
+  const int limit = len - 64; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -597,13 +597,11 @@ DECLSPEC void blake2s_update_global_swap (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_
   blake2s_update_64 (ctx, w0, w1, w2, w3, len - (u32) pos1);
 }
 
-
 DECLSPEC void blake2s_final (PRIVATE_AS blake2s_ctx_t *ctx)
 {
-  blake2s_transform (ctx->h, ctx->m, ctx->len, BLAKE2S_FINAL);
+  blake2s_transform (ctx->h, ctx->m, ctx->len, (u32) BLAKE2S_FINAL);
 }
 
-
 DECLSPEC void blake2s_hmac_init_64 (PRIVATE_AS blake2s_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3)
 {
   u32 a0[4];
@@ -1158,7 +1156,7 @@ DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVA
   u32x w2[4];
   u32x w3[4];
 
-  const int limit = (const int) len - 64; // int type needed, could be negative
+  const int limit = len - 64; // int type needed, could be negative
 
   int pos1;
   int pos4;
@@ -1207,7 +1205,7 @@ DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVA
 
 DECLSPEC void blake2s_final_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx)
 {
-  blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2S_FINAL);
+  blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, (u32) BLAKE2S_FINAL);
 }
 
 DECLSPEC void blake2s_hmac_init_vector_64 (PRIVATE_AS blake2s_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3)
diff --git a/OpenCL/m15400_a3-optimized.cl b/OpenCL/m15400_a3-optimized.cl
index a0f856bac..f60635a1a 100644
--- a/OpenCL/m15400_a3-optimized.cl
+++ b/OpenCL/m15400_a3-optimized.cl
@@ -238,12 +238,162 @@ DECLSPEC void chacha20_transform (PRIVATE_AS const u32x *w0, PRIVATE_AS const u3
 
 KERNEL_FQ KERNEL_FA void m15400_m04 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
 {
-  // fixed size 32
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+  u32 w1[4];
+
+  w0[0] = pws[gid].i[0];
+  w0[1] = pws[gid].i[1];
+  w0[2] = pws[gid].i[2];
+  w0[3] = pws[gid].i[3];
+  w1[0] = pws[gid].i[4];
+  w1[1] = pws[gid].i[5];
+  w1[2] = pws[gid].i[6];
+  w1[3] = pws[gid].i[7];
+
+  /**
+   * Salt prep
+   */
+
+  u32 iv[2];
+
+  iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
+  iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
+
+  u32 plain[2];
+
+  plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
+  plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
+
+  u32 position[2];
+
+  position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
+  position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
+
+  u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
+
+  /**
+   * loop
+   */
+
+  u32 w0l = pws[gid].i[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+    const u32x w0x = w0l | w0r;
+
+    u32x w0_t[4];
+    u32x w1_t[4];
+
+    w0_t[0] = w0x;
+    w0_t[1] = w0[1];
+    w0_t[2] = w0[2];
+    w0_t[3] = w0[3];
+    w1_t[0] = w1[0];
+    w1_t[1] = w1[1];
+    w1_t[2] = w1[2];
+    w1_t[3] = w1[3];
+
+    u32x digest[4] = { 0 };
+
+    chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
+
+    const u32x r0 = digest[0];
+    const u32x r1 = digest[1];
+    const u32x r2 = digest[2];
+    const u32x r3 = digest[3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
 }
 
 KERNEL_FQ KERNEL_FA void m15400_m08 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
 {
-  // fixed size 32
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+  u32 w1[4];
+
+  w0[0] = pws[gid].i[0];
+  w0[1] = pws[gid].i[1];
+  w0[2] = pws[gid].i[2];
+  w0[3] = pws[gid].i[3];
+  w1[0] = pws[gid].i[4];
+  w1[1] = pws[gid].i[5];
+  w1[2] = pws[gid].i[6];
+  w1[3] = pws[gid].i[7];
+
+  /**
+   * Salt prep
+   */
+
+  u32 iv[2];
+
+  iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
+  iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
+
+  u32 plain[2];
+
+  plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
+  plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
+
+  u32 position[2];
+
+  position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
+  position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
+
+  u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
+
+  /**
+   * loop
+   */
+
+  u32 w0l = pws[gid].i[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+    const u32x w0x = w0l | w0r;
+
+    u32x w0_t[4];
+    u32x w1_t[4];
+
+    w0_t[0] = w0x;
+    w0_t[1] = w0[1];
+    w0_t[2] = w0[2];
+    w0_t[3] = w0[3];
+    w1_t[0] = w1[0];
+    w1_t[1] = w1[1];
+    w1_t[2] = w1[2];
+    w1_t[3] = w1[3];
+
+    u32x digest[4] = { 0 };
+
+    chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
+
+    const u32x r0 = digest[0];
+    const u32x r1 = digest[1];
+    const u32x r2 = digest[2];
+    const u32x r3 = digest[3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
 }
 
 KERNEL_FQ KERNEL_FA void m15400_m16 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
@@ -328,12 +478,187 @@ KERNEL_FQ KERNEL_FA void m15400_m16 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
 
 KERNEL_FQ KERNEL_FA void m15400_s04 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
 {
-  // fixed size 32
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+  u32 w1[4];
+
+  w0[0] = pws[gid].i[0];
+  w0[1] = pws[gid].i[1];
+  w0[2] = pws[gid].i[2];
+  w0[3] = pws[gid].i[3];
+  w1[0] = pws[gid].i[4];
+  w1[1] = pws[gid].i[5];
+  w1[2] = pws[gid].i[6];
+  w1[3] = pws[gid].i[7];
+
+  /**
+   * Salt prep
+   */
+
+  u32 iv[2];
+
+  iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
+  iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
+
+  u32 plain[2];
+
+  plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
+  plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
+
+  u32 position[2];
+
+  position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
+  position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
+
+  u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  u32 w0l = pws[gid].i[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+    const u32x w0x = w0l | w0r;
+
+    u32x w0_t[4];
+    u32x w1_t[4];
+
+    w0_t[0] = w0x;
+    w0_t[1] = w0[1];
+    w0_t[2] = w0[2];
+    w0_t[3] = w0[3];
+    w1_t[0] = w1[0];
+    w1_t[1] = w1[1];
+    w1_t[2] = w1[2];
+    w1_t[3] = w1[3];
+
+    u32x digest[4] = { 0 };
+
+    chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
+
+    const u32x r0 = digest[0];
+    const u32x r1 = digest[1];
+    const u32x r2 = digest[2];
+    const u32x r3 = digest[3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
 }
 
+
 KERNEL_FQ KERNEL_FA void m15400_s08 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
 {
-  // fixed size 32
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w0[4];
+  u32 w1[4];
+
+  w0[0] = pws[gid].i[0];
+  w0[1] = pws[gid].i[1];
+  w0[2] = pws[gid].i[2];
+  w0[3] = pws[gid].i[3];
+  w1[0] = pws[gid].i[4];
+  w1[1] = pws[gid].i[5];
+  w1[2] = pws[gid].i[6];
+  w1[3] = pws[gid].i[7];
+
+  /**
+   * Salt prep
+   */
+
+  u32 iv[2];
+
+  iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
+  iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
+
+  u32 plain[2];
+
+  plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
+  plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
+
+  u32 position[2];
+
+  position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
+  position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
+
+  u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  u32 w0l = pws[gid].i[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+    const u32x w0x = w0l | w0r;
+
+    u32x w0_t[4];
+    u32x w1_t[4];
+
+    w0_t[0] = w0x;
+    w0_t[1] = w0[1];
+    w0_t[2] = w0[2];
+    w0_t[3] = w0[3];
+    w1_t[0] = w1[0];
+    w1_t[1] = w1[1];
+    w1_t[2] = w1[2];
+    w1_t[3] = w1[3];
+
+    u32x digest[4] = { 0 };
+
+    chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
+
+    const u32x r0 = digest[0];
+    const u32x r1 = digest[1];
+    const u32x r2 = digest[2];
+    const u32x r3 = digest[3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
 }
 
 KERNEL_FQ KERNEL_FA void m15400_s16 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl
index 54bba75d7..144df2063 100644
--- a/OpenCL/m18600-pure.cl
+++ b/OpenCL/m18600-pure.cl
@@ -636,7 +636,7 @@ KERNEL_FQ KERNEL_FA void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t
   }
 }
 
-KERNEL_FQ KERNEL_FA void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
+KERNEL_FQ KERNEL_FA FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
 {
   const u64 gid = get_global_id (0);
   const u64 lid = get_local_id (0);
diff --git a/src/autotune.c b/src/autotune.c
index a599e65be..4a4dc0d85 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -333,6 +333,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     // v7 autotuner is a lot more straight forward
 
+    if (kernel_threads_min < kernel_threads_max)
+    {
+      // there could be a situation, like in 18600, where we have a thread_min which is not a multiple of
+      // kernel_preferred_wgs_multiple. As long as it's only a threads_min, but not a threads_max, we
+      // should stick to at least kernel_preferred_wgs_multiple
+
+      if (kernel_threads_min % device_param->kernel_preferred_wgs_multiple)
+      {
+        if ((device_param->kernel_preferred_wgs_multiple >= kernel_threads_min) && (device_param->kernel_preferred_wgs_multiple <= kernel_threads_max))
+        {
+          kernel_threads = device_param->kernel_preferred_wgs_multiple;
+        }
+      }
+    }
+
     if (hashes && hashes->st_salts_buf)
     {
       u32 start = kernel_loops_max;
@@ -356,15 +371,15 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     for (u32 kernel_loops_test = kernel_loops; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
     {
-      double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_test, kernel_threads_min, 2);
+      double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops_test, kernel_threads, 2);
 
-      //printf ("loop %f %u %u %u\n", exec_msec, kernel_accel_min, kernel_loops_test, kernel_threads_min);
+      //printf ("loop %f %u %u %u\n", exec_msec, kernel_accel, kernel_loops_test, kernel_threads);
       if (exec_msec > target_msec) break;
 
       // we want a little room for threads to play with so not full target_msec
       // but of course only if we are going to make use of that :)
 
-      if ((kernel_accel_min < kernel_accel_max) || (kernel_threads_min < kernel_threads_max))
+      if ((kernel_accel < kernel_accel_max) || (kernel_threads < kernel_threads_max))
       {
         if (exec_msec > target_msec / 8) break;
 
@@ -378,11 +393,14 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       kernel_loops = kernel_loops_test;
     }
 
-    for (u32 kernel_threads_test = kernel_threads_min; kernel_threads_test <= kernel_threads_max; kernel_threads_test <<= 1)
-    {
-      double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops, kernel_threads_test, 2);
+    double exec_msec_init = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2);
+
+    float threads_eff_best = exec_msec_init / kernel_threads;
+
+    for (u32 kernel_threads_test = kernel_threads; kernel_threads_test <= kernel_threads_max; kernel_threads_test = (kernel_threads_test < device_param->kernel_preferred_wgs_multiple) ? kernel_threads_test << 1 : kernel_threads_test + device_param->kernel_preferred_wgs_multiple)
+    {
+      double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads_test, 2);
 
-      //printf ("threads %f %u %u %u\n", exec_msec, kernel_accel_min, kernel_loops, kernel_threads_test);
       if (exec_msec > target_msec) break;
 
       if (kernel_threads >= 32)
@@ -392,7 +410,14 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (exec_msec > target_msec / 8) break;
       }
 
-      kernel_threads = kernel_threads_test;
+      float threads_eff_cur = exec_msec / kernel_threads_test;
+
+      if ((threads_eff_cur * 1.05) < threads_eff_best)
+      {
+        threads_eff_best = threads_eff_cur;
+
+        kernel_threads = kernel_threads_test;
+      }
     }
 
     #define STEPS_CNT 12
diff --git a/src/backend.c b/src/backend.c
index e324a2e01..789dcd7df 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -6438,7 +6438,7 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
       device_param->has_lop3  = false;
       device_param->has_mov64 = false;
       device_param->has_prmt  = false;
-      device_param->has_shfw  = prop.arch.hasFunnelShift;
+      device_param->has_shfw  = true; // always reports false : prop.arch.hasFunnelShift;
 
       // device_available_mem
 
diff --git a/src/modules/module_13711.c b/src/modules/module_13711.c
index ff18b6c52..9e72c4b0f 100644
--- a/src/modules/module_13711.c
+++ b/src/modules/module_13711.c
@@ -129,6 +129,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -344,7 +351,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13712.c b/src/modules/module_13712.c
index b39eecfe7..26ad43579 100644
--- a/src/modules/module_13712.c
+++ b/src/modules/module_13712.c
@@ -146,6 +146,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -361,7 +368,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13713.c b/src/modules/module_13713.c
index d3c8d5fa4..7c67d69e5 100644
--- a/src/modules/module_13713.c
+++ b/src/modules/module_13713.c
@@ -146,6 +146,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -361,7 +368,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13721.c b/src/modules/module_13721.c
index b6c937875..14a28165d 100644
--- a/src/modules/module_13721.c
+++ b/src/modules/module_13721.c
@@ -131,6 +131,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -346,7 +353,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13722.c b/src/modules/module_13722.c
index 3c92b2091..7850433b3 100644
--- a/src/modules/module_13722.c
+++ b/src/modules/module_13722.c
@@ -148,6 +148,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -363,7 +370,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13723.c b/src/modules/module_13723.c
index dd8f513f7..fae3d81ec 100644
--- a/src/modules/module_13723.c
+++ b/src/modules/module_13723.c
@@ -148,6 +148,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -363,7 +370,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13731.c b/src/modules/module_13731.c
index 27f0e5033..a3f175fd8 100644
--- a/src/modules/module_13731.c
+++ b/src/modules/module_13731.c
@@ -129,6 +129,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -344,7 +351,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13732.c b/src/modules/module_13732.c
index e78dbaa83..4ac4cc8e9 100644
--- a/src/modules/module_13732.c
+++ b/src/modules/module_13732.c
@@ -129,6 +129,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -344,7 +351,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13733.c b/src/modules/module_13733.c
index 1d7c36628..29da2b0ca 100644
--- a/src/modules/module_13733.c
+++ b/src/modules/module_13733.c
@@ -129,6 +129,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -344,7 +351,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c
index ee27acfd6..d2d0a50c2 100644
--- a/src/modules/module_13751.c
+++ b/src/modules/module_13751.c
@@ -166,6 +166,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -381,7 +388,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c
index 095758dd2..bf51b8d7b 100644
--- a/src/modules/module_13752.c
+++ b/src/modules/module_13752.c
@@ -166,6 +166,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -381,7 +388,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c
index 4eee98625..fadb4ffed 100644
--- a/src/modules/module_13753.c
+++ b/src/modules/module_13753.c
@@ -166,6 +166,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -381,7 +388,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13771.c b/src/modules/module_13771.c
index b753d171f..32951f7c2 100644
--- a/src/modules/module_13771.c
+++ b/src/modules/module_13771.c
@@ -150,6 +150,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -365,7 +372,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13772.c b/src/modules/module_13772.c
index 6494ee637..e6af92bfb 100644
--- a/src/modules/module_13772.c
+++ b/src/modules/module_13772.c
@@ -150,6 +150,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -365,7 +372,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_13773.c b/src/modules/module_13773.c
index 6f2740347..32b6ddfb1 100644
--- a/src/modules/module_13773.c
+++ b/src/modules/module_13773.c
@@ -150,6 +150,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -365,7 +372,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29411.c b/src/modules/module_29411.c
index a3e7160eb..60fa66f6f 100644
--- a/src/modules/module_29411.c
+++ b/src/modules/module_29411.c
@@ -114,6 +114,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -356,7 +363,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29412.c b/src/modules/module_29412.c
index 969e6d355..df64b2a6c 100644
--- a/src/modules/module_29412.c
+++ b/src/modules/module_29412.c
@@ -114,6 +114,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -356,7 +363,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29413.c b/src/modules/module_29413.c
index 935d16dc5..7f455767d 100644
--- a/src/modules/module_29413.c
+++ b/src/modules/module_29413.c
@@ -131,6 +131,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -373,7 +380,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29421.c b/src/modules/module_29421.c
index 4c0e94163..69e4ba2d5 100644
--- a/src/modules/module_29421.c
+++ b/src/modules/module_29421.c
@@ -130,6 +130,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -372,7 +379,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29422.c b/src/modules/module_29422.c
index d32d1471e..f3b4cc98c 100644
--- a/src/modules/module_29422.c
+++ b/src/modules/module_29422.c
@@ -141,6 +141,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -383,7 +390,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29423.c b/src/modules/module_29423.c
index 988bc8a42..a038dd7a4 100644
--- a/src/modules/module_29423.c
+++ b/src/modules/module_29423.c
@@ -141,6 +141,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -383,7 +390,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29431.c b/src/modules/module_29431.c
index a22c24e3d..8555b19b7 100644
--- a/src/modules/module_29431.c
+++ b/src/modules/module_29431.c
@@ -131,6 +131,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -373,7 +380,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29432.c b/src/modules/module_29432.c
index 563b254c0..b9c54a1ca 100644
--- a/src/modules/module_29432.c
+++ b/src/modules/module_29432.c
@@ -131,6 +131,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -373,7 +380,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29433.c b/src/modules/module_29433.c
index 90e2033c9..519ff8fba 100644
--- a/src/modules/module_29433.c
+++ b/src/modules/module_29433.c
@@ -131,6 +131,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -373,7 +380,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29451.c b/src/modules/module_29451.c
index fe3c0737f..94d902273 100644
--- a/src/modules/module_29451.c
+++ b/src/modules/module_29451.c
@@ -151,6 +151,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -393,7 +400,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29452.c b/src/modules/module_29452.c
index 842f36f47..1c5ac7f1b 100644
--- a/src/modules/module_29452.c
+++ b/src/modules/module_29452.c
@@ -151,6 +151,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -393,7 +400,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29453.c b/src/modules/module_29453.c
index 6162337de..74322f135 100644
--- a/src/modules/module_29453.c
+++ b/src/modules/module_29453.c
@@ -151,6 +151,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -393,7 +400,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29471.c b/src/modules/module_29471.c
index 18d50d294..a2bfe7996 100644
--- a/src/modules/module_29471.c
+++ b/src/modules/module_29471.c
@@ -143,6 +143,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -385,7 +392,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29472.c b/src/modules/module_29472.c
index a4c0c275e..9a1614135 100644
--- a/src/modules/module_29472.c
+++ b/src/modules/module_29472.c
@@ -143,6 +143,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -385,7 +392,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
diff --git a/src/modules/module_29473.c b/src/modules/module_29473.c
index 744c1ac6d..b767dad92 100644
--- a/src/modules/module_29473.c
+++ b/src/modules/module_29473.c
@@ -143,6 +143,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
   return tmp_size;
 }
 
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_min = 250;
+
+  return kernel_loops_min;
+}
+
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@@ -385,7 +392,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
-  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
   module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;

From 9f3d7711378f3d1a722de9e1c32e53427aa6150e Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 6 Jul 2025 07:59:56 +0200
Subject: [PATCH 70/83] fix build error on src/user_options.c

---
 src/user_options.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/user_options.c b/src/user_options.c
index 2647ac46a..cb27a9b47 100644
--- a/src/user_options.c
+++ b/src/user_options.c
@@ -1955,8 +1955,7 @@ void user_options_preprocess (hashcat_ctx_t *hashcat_ctx)
    || user_options->progress_only    == true
    || user_options->identify         == true
    || user_options->usage             > 0
-   || 
-      > 0
+   || user_options->hash_info         > 0
    || user_options->backend_info      > 0)
   {
     user_options->hwmon               = false;

From aa10bcf80e8d5e242be5e2a6017708d5da629b06 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 6 Jul 2025 08:06:24 +0200
Subject: [PATCH 71/83] update remaining user_options->hash_info checks

---
 src/bridges.c       | 12 ++++++------
 src/hashes.c        |  2 +-
 src/mpsp.c          |  2 +-
 src/outfile_check.c |  4 ++--
 src/potfile.c       |  2 +-
 src/restore.c       |  2 +-
 src/straight.c      |  2 +-
 src/tuningdb.c      |  2 +-
 src/wordlist.c      |  2 +-
 9 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/bridges.c b/src/bridges.c
index 9b37aa25a..3df210f80 100644
--- a/src/bridges.c
+++ b/src/bridges.c
@@ -87,12 +87,12 @@ bool bridges_init (hashcat_ctx_t *hashcat_ctx)
   user_options_t  *user_options = hashcat_ctx->user_options;
   hashconfig_t    *hashconfig   = hashcat_ctx->hashconfig;
 
-  if (user_options->hash_info    == true) return true;
+  if (user_options->backend_info  > 0)    return true;
+  if (user_options->hash_info     > 0)    return true;
+  if (user_options->usage         > 0)    return true;
   if (user_options->left         == true) return true;
   if (user_options->show         == true) return true;
-  if (user_options->usage         > 0)    return true;
   if (user_options->version      == true) return true;
-  if (user_options->backend_info  > 0)    return true;
 
   // There is a problem here. At this point, hashconfig is not yet initialized.
   // This is because initializing hashconfig requires the module to be loaded,
@@ -241,12 +241,12 @@ bool bridges_salt_prepare (hashcat_ctx_t *hashcat_ctx)
   hashes_t        *hashes       = hashcat_ctx->hashes;
   user_options_t  *user_options = hashcat_ctx->user_options;
 
-  if (user_options->hash_info    == true) return true;
+  if (user_options->backend_info  > 0)    return true;
+  if (user_options->hash_info     > 0)    return true;
+  if (user_options->usage         > 0)    return true;
   if (user_options->left         == true) return true;
   if (user_options->show         == true) return true;
-  if (user_options->usage         > 0)    return true;
   if (user_options->version      == true) return true;
-  if (user_options->backend_info  > 0)    return true;
 
   if (bridge_ctx->enabled == false) return true;
 
diff --git a/src/hashes.c b/src/hashes.c
index 40d94941f..767c30824 100644
--- a/src/hashes.c
+++ b/src/hashes.c
@@ -1133,7 +1133,7 @@ int hashes_init_stage1 (hashcat_ctx_t *hashcat_ctx)
 
     hashes_cnt = 1;
   }
-  else if (user_options->hash_info == true)
+  else if (user_options->hash_info > 0)
   {
   }
   else if (user_options->keyspace == true)
diff --git a/src/mpsp.c b/src/mpsp.c
index 3efe345d7..0d2ae9ae0 100644
--- a/src/mpsp.c
+++ b/src/mpsp.c
@@ -1403,8 +1403,8 @@ int mask_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
-  if (user_options->hash_info    == true) return 0;
   if (user_options->left         == true) return 0;
   if (user_options->show         == true) return 0;
   if (user_options->version      == true) return 0;
diff --git a/src/outfile_check.c b/src/outfile_check.c
index e681b7263..afee8ddb8 100644
--- a/src/outfile_check.c
+++ b/src/outfile_check.c
@@ -343,13 +343,13 @@ int outcheck_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   outcheck_ctx->enabled = false;
 
+  if (user_options->backend_info   > 0)    return 0;
+  if (user_options->hash_info      > 0)    return 0;
   if (user_options->keyspace      == true) return 0;
   if (user_options->benchmark     == true) return 0;
-  if (user_options->hash_info     == true) return 0;
   if (user_options->speed_only    == true) return 0;
   if (user_options->progress_only == true) return 0;
   if (user_options->identify      == true) return 0;
-  if (user_options->backend_info   > 0)    return 0;
 
   if (hashconfig->outfile_check_disable == true) return 0;
   if (user_options->outfile_check_timer == 0)    return 0;
diff --git a/src/potfile.c b/src/potfile.c
index afafca2f1..9a87ed027 100644
--- a/src/potfile.c
+++ b/src/potfile.c
@@ -115,9 +115,9 @@ int potfile_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage            > 0)     return 0;
   if (user_options->backend_info     > 0)     return 0;
+  if (user_options->hash_info        > 0)     return 0;
 
   if (user_options->benchmark       == true)  return 0;
-  if (user_options->hash_info       == true)  return 0;
   if (user_options->keyspace        == true)  return 0;
   if (user_options->stdout_flag     == true)  return 0;
   if (user_options->speed_only      == true)  return 0;
diff --git a/src/restore.c b/src/restore.c
index 2ca95ba92..e5c141bfc 100644
--- a/src/restore.c
+++ b/src/restore.c
@@ -312,9 +312,9 @@ int restore_ctx_init (hashcat_ctx_t *hashcat_ctx, int argc, char **argv)
 
   if (user_options->usage            > 0)     return 0;
   if (user_options->backend_info     > 0)     return 0;
+  if (user_options->hash_info        > 0)     return 0;
 
   if (user_options->benchmark       == true)  return 0;
-  if (user_options->hash_info       == true)  return 0;
   if (user_options->keyspace        == true)  return 0;
   if (user_options->left            == true)  return 0;
   if (user_options->show            == true)  return 0;
diff --git a/src/straight.c b/src/straight.c
index 36d3a8eb8..58212be52 100644
--- a/src/straight.c
+++ b/src/straight.c
@@ -264,11 +264,11 @@ int straight_ctx_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
   if (user_options->left         == true) return 0;
   if (user_options->show         == true) return 0;
   if (user_options->version      == true) return 0;
-  if (user_options->hash_info    == true) return 0;
 
   if (user_options->attack_mode  == ATTACK_MODE_BF) return 0;
 
diff --git a/src/tuningdb.c b/src/tuningdb.c
index 41d3795aa..2c507feb7 100644
--- a/src/tuningdb.c
+++ b/src/tuningdb.c
@@ -56,8 +56,8 @@ int tuning_db_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
-  if (user_options->hash_info    == true) return 0;
   if (user_options->keyspace     == true) return 0;
   if (user_options->left         == true) return 0;
   if (user_options->show         == true) return 0;
diff --git a/src/wordlist.c b/src/wordlist.c
index 1776b4885..bfe09de89 100644
--- a/src/wordlist.c
+++ b/src/wordlist.c
@@ -697,9 +697,9 @@ int wl_data_init (hashcat_ctx_t *hashcat_ctx)
 
   if (user_options->usage         > 0)    return 0;
   if (user_options->backend_info  > 0)    return 0;
+  if (user_options->hash_info     > 0)    return 0;
 
   if (user_options->benchmark    == true) return 0;
-  if (user_options->hash_info    == true) return 0;
   if (user_options->left         == true) return 0;
   if (user_options->version      == true) return 0;
 

From 0576c4149100839ce87c0ac8fe8ad3d0836b2ad0 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Sun, 6 Jul 2025 10:14:20 +0200
Subject: [PATCH 72/83] Updated autotune to set initial values for accel,
 threads, and loop based on theoretical assumptions, with the idea for more
 accurate early results from measured test runs. Updated autotune to use the
 iteration count of the first user-defined hash instead of the self-test hash
 for slow hash tuning, assuming consistency across the hash list. Updated
 autotune to prefer best-efficiency thread count only if it is at least 6%
 better than the max thread count, improving consistency in thread and accel
 values while allowing exceptions for special modes like 18600. Changed
 default theoretical free memory by applying a reduction from max memory from
 20% changed to 34%/. This happens only when runtime/OS cannot provide
 low-level free memory data. Applied the same logic using --backend-keep-free
 percentage to host memory during early setup, when hashcat auto-reduces
 thread and accel counts to stay within limits, and that per compute device.
 Changed terminal output from "Host memory required for this attack: ..." to
 "Host memory allocated for this attack: ...", and added free host memory as
 reference.

---
 src/autotune.c | 85 +++++++++++++++++++++++++++++++++++++++-----------
 src/backend.c  | 17 ++++++++--
 src/main.c     | 12 ++++++-
 3 files changed, 92 insertions(+), 22 deletions(-)

diff --git a/src/autotune.c b/src/autotune.c
index 4a4dc0d85..7938fb259 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -332,6 +332,22 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     }
 
     // v7 autotuner is a lot more straight forward
+    // we start with some purely theoretical values as a base, then move on to some meassured tests
+
+    if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+    {
+      if (kernel_accel_min < kernel_accel_max)
+      {
+        // let's also do some minimal accel, this is only to improve early meassurements taken with try_run()
+
+        const u32 kernel_accel_start = previous_power_of_two (kernel_accel_max / 8);
+
+        if ((kernel_accel_start >= kernel_accel_min) && (kernel_accel_start <= kernel_accel_max))
+        {
+          kernel_accel = kernel_accel_start;
+        }
+      }
+    }
 
     if (kernel_threads_min < kernel_threads_max)
     {
@@ -348,24 +364,42 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       }
     }
 
-    if (hashes && hashes->st_salts_buf)
+    if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
     {
-      u32 start = kernel_loops_max;
-
-      const u32 salt_iter = hashes->st_salts_buf->salt_iter;
-
-      if (salt_iter)
+      if (hashes && hashes->salts_buf)
       {
-        start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter));
-        start = MIN (start, smallest_repeat_double (hashes->st_salts_buf->salt_iter + 1));
+        u32 start = kernel_loops_max;
 
-        if (((hashes->st_salts_buf->salt_iter + 0) % 125) == 0) start = MIN (start, 125);
-        if (((hashes->st_salts_buf->salt_iter + 1) % 125) == 0) start = MIN (start, 125);
+        const u32 salt_iter = hashes->salts_buf->salt_iter; // we use the first salt as reference
 
-        if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
+        if (salt_iter)
         {
-          kernel_loops = start;
+          start = MIN (start, smallest_repeat_double (hashes->salts_buf->salt_iter));
+          start = MIN (start, smallest_repeat_double (hashes->salts_buf->salt_iter + 1));
+
+          if (((hashes->salts_buf->salt_iter + 0) % 125) == 0) start = MIN (start, 125);
+          if (((hashes->salts_buf->salt_iter + 1) % 125) == 0) start = MIN (start, 125);
+
+          if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
+          {
+            kernel_loops = start;
+          }
         }
+        else
+        {
+          // how can there be a slow hash with no iterations?
+        }
+      }
+    }
+    else
+    {
+      // let's also do some minimal loops, this is only to improve early meassurements taken with try_run()
+
+      const u32 kernel_loops_start = previous_power_of_two (kernel_loops_max / 4);
+
+      if ((kernel_loops_start >= kernel_loops_min) && (kernel_loops_start <= kernel_loops_max))
+      {
+        kernel_loops = kernel_loops_start;
       }
     }
 
@@ -396,30 +430,45 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
     double exec_msec_init = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2);
 
     float threads_eff_best = exec_msec_init / kernel_threads;
+    u32   threads_cnt_best = kernel_threads;
+
+    float threads_eff_prev = 0;
+    u32   threads_cnt_prev = 0;
 
     for (u32 kernel_threads_test = kernel_threads; kernel_threads_test <= kernel_threads_max; kernel_threads_test = (kernel_threads_test < device_param->kernel_preferred_wgs_multiple) ? kernel_threads_test << 1 : kernel_threads_test + device_param->kernel_preferred_wgs_multiple)
     {
       double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads_test, 2);
 
+      //printf ("thread %f %u %u %u\n", exec_msec, kernel_accel, kernel_loops, kernel_threads_test);
       if (exec_msec > target_msec) break;
 
       if (kernel_threads >= 32)
       {
         // we want a little room for accel to play with so not full target_msec
 
-        if (exec_msec > target_msec / 8) break;
+        if (exec_msec > target_msec / 4) break;
       }
 
-      float threads_eff_cur = exec_msec / kernel_threads_test;
+      kernel_threads = kernel_threads_test;
 
-      if ((threads_eff_cur * 1.05) < threads_eff_best)
+      threads_eff_prev = exec_msec / kernel_threads_test;
+      threads_cnt_prev = kernel_threads_test;
+
+      //printf ("%f\n", threads_eff_prev);
+
+      if (threads_eff_prev < threads_eff_best)
       {
-        threads_eff_best = threads_eff_cur;
-
-        kernel_threads = kernel_threads_test;
+        threads_eff_best = threads_eff_prev;
+        threads_cnt_best = threads_cnt_prev;
       }
     }
 
+    // now we decide to choose either maximum or in some extreme cases prefer more efficient ones
+    if ((threads_eff_best * 1.06) < threads_eff_prev)
+    {
+      kernel_threads = threads_cnt_best;
+    }
+
     #define STEPS_CNT 12
 
     // now we tune for kernel-accel but with the new kernel-loops from previous loop set
diff --git a/src/backend.c b/src/backend.c
index 789dcd7df..6e8fe4aad 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -10217,10 +10217,10 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
           {
             const u64 device_available_mem_sav = device_param->device_available_mem;
 
-            const u64 device_available_mem_new = device_available_mem_sav - (device_available_mem_sav * 0.2);
+            const u64 device_available_mem_new = device_available_mem_sav - (device_available_mem_sav * 0.34);
 
             event_log_warning (hashcat_ctx, "* Device #%u: This system does not offer any reliable method to query actual free memory. Estimated base: %" PRIu64, device_id + 1, device_available_mem_sav);
-            event_log_warning (hashcat_ctx, "             Assuming normal desktop activity, reducing estimate by 20%%: %" PRIu64, device_available_mem_new);
+            event_log_warning (hashcat_ctx, "             Assuming normal desktop activity, reducing estimate by 34%%: %" PRIu64, device_available_mem_new);
             event_log_warning (hashcat_ctx, "             This can hurt performance drastically, especially on memory-heavy algorithms.");
             event_log_warning (hashcat_ctx, "             You can adjust this percentage using --backend-devices-keepfree");
             event_log_warning (hashcat_ctx, NULL);
@@ -16275,12 +16275,23 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     {
       const u64 GiB4 = 4ULL * 1024 * 1024 * 1024;
 
-      event_log_warning (hashcat_ctx, "Couldn't query the OS for free memory, assuming 4GiB");
+      event_log_warning (hashcat_ctx, "Couldn't query the OS for free memory, assuming 4GiB is available per compute device");
 
       accel_limit_host = GiB4;
     }
     else
     {
+      if (user_options->backend_devices_keepfree)
+      {
+        accel_limit_host = ((u64) accel_limit_host * (100 - user_options->backend_devices_keepfree)) / 100;
+      }
+      else
+      {
+        accel_limit_host = accel_limit_host - (accel_limit_host * 0.34);
+      }
+
+      accel_limit_host /= backend_ctx->backend_devices_active;
+
       // even tho let's not be greedy
 
       const u64 GiB8 = 8ULL * 1024 * 1024 * 1024;
diff --git a/src/main.c b/src/main.c
index 994b0bc9f..69d89e81b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -643,7 +643,17 @@ static void main_backend_session_hostmem (MAYBE_UNUSED hashcat_ctx_t *hashcat_ct
 
   const u64 *hostmem = (const u64 *) buf;
 
-  event_log_info (hashcat_ctx, "Host memory required for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024));
+  u64 free_memory = 0;
+
+  if (get_free_memory (&free_memory) == false)
+  {
+    event_log_info (hashcat_ctx, "Host memory allocated for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024));
+  }
+  else
+  {
+    event_log_info (hashcat_ctx, "Host memory allocated for this attack: %" PRIu64 " MB (%" PRIu64 " MB free)", *hostmem / (1024 * 1024), free_memory / (1024 * 1024));
+  }
+
   event_log_info (hashcat_ctx, NULL);
 }
 

From f663abee4426cba0cc80ea0ce9adf5218fae85c9 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Sun, 6 Jul 2025 21:28:37 +0200
Subject: [PATCH 73/83] Added workaround to get rid of internal runtimes memory
 leaks

As of now, especially in the benchmark mode, hashcat will not go to create and destroy context and command-queue for each enabled device each time it switches from one hash-mode to the next.
Specifically using OpenCL with an NVIDIA device, it was not possible to complete the benchmark because clCreateContext has memory leaks that slowly consume all available GPU memory until hashcat can activate a new context and disable the device.

Avoid deprecated HIP functions

All hipCtx* features have been declared deprecated, so we have replaced them with the new ones, also fixing a critical bug on handling multiple AMD devices in the same system.
---
 docs/changes.txt  |   2 +
 include/ext_hip.h | 976 +++++++++++++++++++++++-----------------------
 src/autotune.c    |   7 +-
 src/backend.c     | 370 +++++++-----------
 src/dispatch.c    |  14 +-
 src/ext_hip.c     | 210 +++++++---
 src/selftest.c    |   4 +-
 7 files changed, 793 insertions(+), 790 deletions(-)

diff --git a/docs/changes.txt b/docs/changes.txt
index de2d26bcd..c2af93956 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -129,6 +129,7 @@
 - Alias Devices: Prevents hashcat, when started with x86_64 emulation on Apple Silicon, from showing the Apple M1 OpenCL CPU as an alias for the Apple M1 Metal GPU
 - Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support
 - Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
+- Backend: Added workaround to get rid of internal runtimes memory leaks
 - Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
 - Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions
 - Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
@@ -141,6 +142,7 @@
 - Building: Support building windows binaries on macOS using MinGW
 - Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85)
 - Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
+- HIP Backend: Avoid deprecated functions
 - Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
 - Modules: Updated module_unstable_warning
 - Open Document Format: Added support for small documents with content length < 1024
diff --git a/include/ext_hip.h b/include/ext_hip.h
index d0f53d173..3c0b8433a 100644
--- a/include/ext_hip.h
+++ b/include/ext_hip.h
@@ -12,20 +12,22 @@
 
 // start: driver_types.h
 
-typedef void* hipDeviceptr_t;
+typedef void *hipDeviceptr_t;
+
+typedef enum hipFunction_attribute
+{
+  HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,            // The maximum number of threads per block. Depends on function and device.
+  HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,                // The statically allocated shared memory size in bytes per block required by the function.
+  HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,                 // The user-allocated constant memory by the function in bytes.
+  HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,                 // The local memory usage of each thread by this function in bytes.
+  HIP_FUNC_ATTRIBUTE_NUM_REGS,                         // The number of registers used by each thread of this function.
+  HIP_FUNC_ATTRIBUTE_PTX_VERSION,                      // PTX version
+  HIP_FUNC_ATTRIBUTE_BINARY_VERSION,                   // Binary version
+  HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,                    // Cache mode
+  HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,    // The maximum dynamic shared memory per block for this function in bytes.
+  HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, // The shared memory carveout preference in percent of the maximum shared memory.
+  HIP_FUNC_ATTRIBUTE_MAX
 
-typedef enum hipFunction_attribute {
-    HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK,            ///< The maximum number of threads per block. Depends on function and device.
-    HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES,                ///< The statically allocated shared memory size in bytes per block required by the function.
-    HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES,                 ///< The user-allocated constant memory by the function in bytes.
-    HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES,                 ///< The local memory usage of each thread by this function in bytes.
-    HIP_FUNC_ATTRIBUTE_NUM_REGS,                         ///< The number of registers used by each thread of this function.
-    HIP_FUNC_ATTRIBUTE_PTX_VERSION,                      ///< PTX version
-    HIP_FUNC_ATTRIBUTE_BINARY_VERSION,                   ///< Binary version
-    HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA,                    ///< Cache mode
-    HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,    ///< The maximum dynamic shared memory per block for this function in bytes.
-    HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, ///< The shared memory carveout preference in percent of the maximum shared memory.
-    HIP_FUNC_ATTRIBUTE_MAX
 } hipFunction_attribute;
 
 // stop: driver_types.h
@@ -47,120 +49,102 @@ typedef struct ihipModuleSymbol_t* hipFunction_t;
 #define __HIP_NODISCARD
 #endif
 
-typedef enum __HIP_NODISCARD hipError_t {
-    hipSuccess = 0,  ///< Successful completion.
-    hipErrorInvalidValue = 1,  ///< One or more of the parameters passed to the API call is NULL
-                               ///< or not in an acceptable range.
-    hipErrorOutOfMemory = 2,
-    // Deprecated
-    hipErrorMemoryAllocation = 2,  ///< Memory allocation error.
-    hipErrorNotInitialized = 3,
-    // Deprecated
-    hipErrorInitializationError = 3,
-    hipErrorDeinitialized = 4,
-    hipErrorProfilerDisabled = 5,
-    hipErrorProfilerNotInitialized = 6,
-    hipErrorProfilerAlreadyStarted = 7,
-    hipErrorProfilerAlreadyStopped = 8,
-    hipErrorInvalidConfiguration = 9,
-    hipErrorInvalidPitchValue = 12,
-    hipErrorInvalidSymbol = 13,
-    hipErrorInvalidDevicePointer = 17,  ///< Invalid Device Pointer
-    hipErrorInvalidMemcpyDirection = 21,  ///< Invalid memory copy direction
-    hipErrorInsufficientDriver = 35,
-    hipErrorMissingConfiguration = 52,
-    hipErrorPriorLaunchFailure = 53,
-    hipErrorInvalidDeviceFunction = 98,
-    hipErrorNoDevice = 100,  ///< Call to hipGetDeviceCount returned 0 devices
-    hipErrorInvalidDevice = 101,  ///< DeviceID must be in range 0...#compute-devices.
-    hipErrorInvalidImage = 200,
-    hipErrorInvalidContext = 201,  ///< Produced when input context is invalid.
-    hipErrorContextAlreadyCurrent = 202,
-    hipErrorMapFailed = 205,
-    // Deprecated
-    hipErrorMapBufferObjectFailed = 205,  ///< Produced when the IPC memory attach failed from ROCr.
-    hipErrorUnmapFailed = 206,
-    hipErrorArrayIsMapped = 207,
-    hipErrorAlreadyMapped = 208,
-    hipErrorNoBinaryForGpu = 209,
-    hipErrorAlreadyAcquired = 210,
-    hipErrorNotMapped = 211,
-    hipErrorNotMappedAsArray = 212,
-    hipErrorNotMappedAsPointer = 213,
-    hipErrorECCNotCorrectable = 214,
-    hipErrorUnsupportedLimit = 215,
-    hipErrorContextAlreadyInUse = 216,
-    hipErrorPeerAccessUnsupported = 217,
-    hipErrorInvalidKernelFile = 218,  ///< In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
-    hipErrorInvalidGraphicsContext = 219,
-    hipErrorInvalidSource = 300,
-    hipErrorFileNotFound = 301,
-    hipErrorSharedObjectSymbolNotFound = 302,
-    hipErrorSharedObjectInitFailed = 303,
-    hipErrorOperatingSystem = 304,
-    hipErrorInvalidHandle = 400,
-    // Deprecated
-    hipErrorInvalidResourceHandle = 400,  ///< Resource handle (hipEvent_t or hipStream_t) invalid.
-    hipErrorIllegalState = 401, ///< Resource required is not in a valid state to perform operation.
-    hipErrorNotFound = 500,
-    hipErrorNotReady = 600,  ///< Indicates that asynchronous operations enqueued earlier are not
-                             ///< ready.  This is not actually an error, but is used to distinguish
-                             ///< from hipSuccess (which indicates completion).  APIs that return
-                             ///< this error include hipEventQuery and hipStreamQuery.
-    hipErrorIllegalAddress = 700,
-    hipErrorLaunchOutOfResources = 701,  ///< Out of resources error.
-    hipErrorLaunchTimeOut = 702,
-    hipErrorPeerAccessAlreadyEnabled =
-        704,  ///< Peer access was already enabled from the current device.
-    hipErrorPeerAccessNotEnabled =
-        705,  ///< Peer access was never enabled from the current device.
-    hipErrorSetOnActiveProcess = 708,
-    hipErrorContextIsDestroyed = 709,
-    hipErrorAssert = 710,  ///< Produced when the kernel calls assert.
-    hipErrorHostMemoryAlreadyRegistered =
-        712,  ///< Produced when trying to lock a page-locked memory.
-    hipErrorHostMemoryNotRegistered =
-        713,  ///< Produced when trying to unlock a non-page-locked memory.
-    hipErrorLaunchFailure =
-        719,  ///< An exception occurred on the device while executing a kernel.
-    hipErrorCooperativeLaunchTooLarge =
-        720,  ///< This error indicates that the number of blocks launched per grid for a kernel
-              ///< that was launched via cooperative launch APIs exceeds the maximum number of
-              ///< allowed blocks for the current device
-    hipErrorNotSupported = 801,  ///< Produced when the hip API is not supported/implemented
-    hipErrorStreamCaptureUnsupported = 900,  ///< The operation is not permitted when the stream
-                                             ///< is capturing.
-    hipErrorStreamCaptureInvalidated = 901,  ///< The current capture sequence on the stream
-                                             ///< has been invalidated due to a previous error.
-    hipErrorStreamCaptureMerge = 902,  ///< The operation would have resulted in a merge of
-                                       ///< two independent capture sequences.
-    hipErrorStreamCaptureUnmatched = 903,  ///< The capture was not initiated in this stream.
-    hipErrorStreamCaptureUnjoined = 904,  ///< The capture sequence contains a fork that was not
-                                          ///< joined to the primary stream.
-    hipErrorStreamCaptureIsolation = 905,  ///< A dependency would have been created which crosses
-                                           ///< the capture sequence boundary. Only implicit
-                                           ///< in-stream ordering dependencies  are allowed
-                                           ///< to cross the boundary
-    hipErrorStreamCaptureImplicit = 906,  ///< The operation would have resulted in a disallowed
-                                          ///< implicit dependency on a current capture sequence
-                                          ///< from hipStreamLegacy.
-    hipErrorCapturedEvent = 907,  ///< The operation is not permitted on an event which was last
-                                  ///< recorded in a capturing stream.
-    hipErrorStreamCaptureWrongThread = 908,  ///< A stream capture sequence not initiated with
-                                             ///< the hipStreamCaptureModeRelaxed argument to
-                                             ///< hipStreamBeginCapture was passed to
-                                             ///< hipStreamEndCapture in a different thread.
-    hipErrorGraphExecUpdateFailure = 910,  ///< This error indicates that the graph update
-                                           ///< not performed because it included changes which
-                                           ///< violated constraints specific to instantiated graph
-                                           ///< update.
-    hipErrorUnknown = 999,  //< Unknown error.
-    // HSA Runtime Error Codes start here.
-    hipErrorRuntimeMemory = 1052,  ///< HSA runtime memory call returned error.  Typically not seen
-                                   ///< in production systems.
-    hipErrorRuntimeOther = 1053,  ///< HSA runtime call other than memory returned error.  Typically
-                                  ///< not seen in production systems.
-    hipErrorTbd  ///< Marker that more error codes are needed.
+typedef enum __HIP_NODISCARD hipError_t
+{
+  hipSuccess = 0,                            // Successful completion.
+  hipErrorInvalidValue = 1,                  // One or more of the parameters passed to the API call is NULL
+                                             // or not in an acceptable range.
+  hipErrorOutOfMemory = 2,
+  // Deprecated
+  hipErrorMemoryAllocation = 2,              // Memory allocation error.
+  hipErrorNotInitialized = 3,
+  // Deprecated
+  hipErrorInitializationError = 3,
+  hipErrorDeinitialized = 4,
+  hipErrorProfilerDisabled = 5,
+  hipErrorProfilerNotInitialized = 6,
+  hipErrorProfilerAlreadyStarted = 7,
+  hipErrorProfilerAlreadyStopped = 8,
+  hipErrorInvalidConfiguration = 9,
+  hipErrorInvalidPitchValue = 12,
+  hipErrorInvalidSymbol = 13,
+  hipErrorInvalidDevicePointer = 17,         // Invalid Device Pointer
+  hipErrorInvalidMemcpyDirection = 21,       // Invalid memory copy direction
+  hipErrorInsufficientDriver = 35,
+  hipErrorMissingConfiguration = 52,
+  hipErrorPriorLaunchFailure = 53,
+  hipErrorInvalidDeviceFunction = 98,
+  hipErrorNoDevice = 100,                    // Call to hipGetDeviceCount returned 0 devices
+  hipErrorInvalidDevice = 101,               // DeviceID must be in range 0...#compute-devices.
+  hipErrorInvalidImage = 200,
+  hipErrorInvalidContext = 201,              // Produced when input context is invalid.
+  hipErrorContextAlreadyCurrent = 202,
+  hipErrorMapFailed = 205,
+  // Deprecated
+  hipErrorMapBufferObjectFailed = 205,       // Produced when the IPC memory attach failed from ROCr.
+  hipErrorUnmapFailed = 206,
+  hipErrorArrayIsMapped = 207,
+  hipErrorAlreadyMapped = 208,
+  hipErrorNoBinaryForGpu = 209,
+  hipErrorAlreadyAcquired = 210,
+  hipErrorNotMapped = 211,
+  hipErrorNotMappedAsArray = 212,
+  hipErrorNotMappedAsPointer = 213,
+  hipErrorECCNotCorrectable = 214,
+  hipErrorUnsupportedLimit = 215,
+  hipErrorContextAlreadyInUse = 216,
+  hipErrorPeerAccessUnsupported = 217,
+  hipErrorInvalidKernelFile = 218,           // In CUDA DRV, it is CUDA_ERROR_INVALID_PTX
+  hipErrorInvalidGraphicsContext = 219,
+  hipErrorInvalidSource = 300,
+  hipErrorFileNotFound = 301,
+  hipErrorSharedObjectSymbolNotFound = 302,
+  hipErrorSharedObjectInitFailed = 303,
+  hipErrorOperatingSystem = 304,
+  hipErrorInvalidHandle = 400,
+  // Deprecated
+  hipErrorInvalidResourceHandle = 400,       // Resource handle (hipEvent_t or hipStream_t) invalid.
+  hipErrorIllegalState = 401,                // Resource required is not in a valid state to perform operation.
+  hipErrorNotFound = 500,
+  hipErrorNotReady = 600,                    // Indicates that asynchronous operations enqueued earlier are not ready.
+                                             // This is not actually an error, but is used to distinguish from hipSuccess (which indicates completion).
+                                             // APIs that return this error include hipEventQuery and hipStreamQuery.
+  hipErrorIllegalAddress = 700,
+  hipErrorLaunchOutOfResources = 701,        // Out of resources error.
+  hipErrorLaunchTimeOut = 702,
+  hipErrorPeerAccessAlreadyEnabled = 704,    // Peer access was already enabled from the current device.
+  hipErrorPeerAccessNotEnabled = 705,        // Peer access was never enabled from the current device.
+  hipErrorSetOnActiveProcess = 708,
+  hipErrorContextIsDestroyed = 709,
+  hipErrorAssert = 710,                      // Produced when the kernel calls assert.
+  hipErrorHostMemoryAlreadyRegistered = 712, // Produced when trying to lock a page-locked memory.
+  hipErrorHostMemoryNotRegistered = 713,     // Produced when trying to unlock a non-page-locked memory.
+  hipErrorLaunchFailure = 719,               // An exception occurred on the device while executing a kernel.
+  hipErrorCooperativeLaunchTooLarge = 720,   // This error indicates that the number of blocks launched per grid for a kernel
+                                             // that was launched via cooperative launch APIs exceeds the maximum number of
+                                             // allowed blocks for the current device
+  hipErrorNotSupported = 801,                // Produced when the hip API is not supported/implemented
+  hipErrorStreamCaptureUnsupported = 900,    // The operation is not permitted when the stream is capturing.
+  hipErrorStreamCaptureInvalidated = 901,    // The current capture sequence on the stream
+                                             // has been invalidated due to a previous error.
+  hipErrorStreamCaptureMerge = 902,          // The operation would have resulted in a merge of two independent capture sequences.
+  hipErrorStreamCaptureUnmatched = 903,      // The capture was not initiated in this stream.
+  hipErrorStreamCaptureUnjoined = 904,       // The capture sequence contains a fork that was not joined to the primary stream.
+  hipErrorStreamCaptureIsolation = 905,      // A dependency would have been created which crosses the capture sequence boundary.
+                                             // Only implicit in-stream ordering dependencies are allowed to cross the boundary
+  hipErrorStreamCaptureImplicit = 906,       // The operation would have resulted in a disallowed implicit dependency on a current
+                                             // capture sequence from hipStreamLegacy.
+  hipErrorCapturedEvent = 907,               // The operation is not permitted on an event which was last recorded in a capturing stream.
+  hipErrorStreamCaptureWrongThread = 908,    // A stream capture sequence not initiated with the hipStreamCaptureModeRelaxed argument to
+                                             // hipStreamBeginCapture was passed to hipStreamEndCapture in a different thread.
+  hipErrorGraphExecUpdateFailure = 910,      // This error indicates that the graph update not performed because it included changes which
+                                             // violated constraints specific to instantiated graph update.
+  hipErrorUnknown = 999,                     // Unknown error.
+  // HSA Runtime Error Codes start here.
+  hipErrorRuntimeMemory = 1052,              // HSA runtime memory call returned error. Typically not seen in production systems.
+  hipErrorRuntimeOther = 1053,               // HSA runtime call other than memory returned error. Typically not seen in production systems.
+  hipErrorTbd                                // Marker that more error codes are needed.
+
 } hipError_t;
 
 #undef __HIP_NODISCARD
@@ -170,178 +154,178 @@ typedef enum __HIP_NODISCARD hipError_t {
  * hipDeviceAttribute_t
  * hipDeviceAttributeUnused number: 5
  */
-typedef enum hipDeviceAttribute_t {
-    hipDeviceAttributeCudaCompatibleBegin = 0,
+typedef enum hipDeviceAttribute_t
+{
+  hipDeviceAttributeCudaCompatibleBegin = 0,
 
-    hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, ///< Whether ECC support is enabled.
-    hipDeviceAttributeAccessPolicyMaxWindowSize,        ///< Cuda only. The maximum size of the window policy in bytes.
-    hipDeviceAttributeAsyncEngineCount,                 ///< Asynchronous engines number.
-    hipDeviceAttributeCanMapHostMemory,                 ///< Whether host memory can be mapped into device address space
-    hipDeviceAttributeCanUseHostPointerForRegisteredMem,///< Device can access host registered memory
-                                                        ///< at the same virtual address as the CPU
-    hipDeviceAttributeClockRate,                        ///< Peak clock frequency in kilohertz.
-    hipDeviceAttributeComputeMode,                      ///< Compute mode that device is currently in.
-    hipDeviceAttributeComputePreemptionSupported,       ///< Device supports Compute Preemption.
-    hipDeviceAttributeConcurrentKernels,                ///< Device can possibly execute multiple kernels concurrently.
-    hipDeviceAttributeConcurrentManagedAccess,          ///< Device can coherently access managed memory concurrently with the CPU
-    hipDeviceAttributeCooperativeLaunch,                ///< Support cooperative launch
-    hipDeviceAttributeCooperativeMultiDeviceLaunch,     ///< Support cooperative launch on multiple devices
-    hipDeviceAttributeDeviceOverlap,                    ///< Device can concurrently copy memory and execute a kernel.
-                                                        ///< Deprecated. Use instead asyncEngineCount.
-    hipDeviceAttributeDirectManagedMemAccessFromHost,   ///< Host can directly access managed memory on
-                                                        ///< the device without migration
-    hipDeviceAttributeGlobalL1CacheSupported,           ///< Device supports caching globals in L1
-    hipDeviceAttributeHostNativeAtomicSupported,        ///< Link between the device and the host supports native atomic operations
-    hipDeviceAttributeIntegrated,                       ///< Device is integrated GPU
-    hipDeviceAttributeIsMultiGpuBoard,                  ///< Multiple GPU devices.
-    hipDeviceAttributeKernelExecTimeout,                ///< Run time limit for kernels executed on the device
-    hipDeviceAttributeL2CacheSize,                      ///< Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
-    hipDeviceAttributeLocalL1CacheSupported,            ///< caching locals in L1 is supported
-    hipDeviceAttributeLuid,                             ///< 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
-    hipDeviceAttributeLuidDeviceNodeMask,               ///< Luid device node mask. Undefined on TCC and non-Windows platforms
-    hipDeviceAttributeComputeCapabilityMajor,           ///< Major compute capability version number.
-    hipDeviceAttributeManagedMemory,                    ///< Device supports allocating managed memory on this system
-    hipDeviceAttributeMaxBlocksPerMultiProcessor,       ///< Max block size per multiprocessor
-    hipDeviceAttributeMaxBlockDimX,                     ///< Max block size in width.
-    hipDeviceAttributeMaxBlockDimY,                     ///< Max block size in height.
-    hipDeviceAttributeMaxBlockDimZ,                     ///< Max block size in depth.
-    hipDeviceAttributeMaxGridDimX,                      ///< Max grid size  in width.
-    hipDeviceAttributeMaxGridDimY,                      ///< Max grid size  in height.
-    hipDeviceAttributeMaxGridDimZ,                      ///< Max grid size  in depth.
-    hipDeviceAttributeMaxSurface1D,                     ///< Maximum size of 1D surface.
-    hipDeviceAttributeMaxSurface1DLayered,              ///< Cuda only. Maximum dimensions of 1D layered surface.
-    hipDeviceAttributeMaxSurface2D,                     ///< Maximum dimension (width, height) of 2D surface.
-    hipDeviceAttributeMaxSurface2DLayered,              ///< Cuda only. Maximum dimensions of 2D layered surface.
-    hipDeviceAttributeMaxSurface3D,                     ///< Maximum dimension (width, height, depth) of 3D surface.
-    hipDeviceAttributeMaxSurfaceCubemap,                ///< Cuda only. Maximum dimensions of Cubemap surface.
-    hipDeviceAttributeMaxSurfaceCubemapLayered,         ///< Cuda only. Maximum dimension of Cubemap layered surface.
-    hipDeviceAttributeMaxTexture1DWidth,                ///< Maximum size of 1D texture.
-    hipDeviceAttributeMaxTexture1DLayered,              ///< Maximum dimensions of 1D layered texture.
-    hipDeviceAttributeMaxTexture1DLinear,               ///< Maximum number of elements allocatable in a 1D linear texture.
-                                                        ///< Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
-    hipDeviceAttributeMaxTexture1DMipmap,               ///< Maximum size of 1D mipmapped texture.
-    hipDeviceAttributeMaxTexture2DWidth,                ///< Maximum dimension width of 2D texture.
-    hipDeviceAttributeMaxTexture2DHeight,               ///< Maximum dimension hight of 2D texture.
-    hipDeviceAttributeMaxTexture2DGather,               ///< Maximum dimensions of 2D texture if gather operations  performed.
-    hipDeviceAttributeMaxTexture2DLayered,              ///< Maximum dimensions of 2D layered texture.
-    hipDeviceAttributeMaxTexture2DLinear,               ///< Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
-    hipDeviceAttributeMaxTexture2DMipmap,               ///< Maximum dimensions of 2D mipmapped texture.
-    hipDeviceAttributeMaxTexture3DWidth,                ///< Maximum dimension width of 3D texture.
-    hipDeviceAttributeMaxTexture3DHeight,               ///< Maximum dimension height of 3D texture.
-    hipDeviceAttributeMaxTexture3DDepth,                ///< Maximum dimension depth of 3D texture.
-    hipDeviceAttributeMaxTexture3DAlt,                  ///< Maximum dimensions of alternate 3D texture.
-    hipDeviceAttributeMaxTextureCubemap,                ///< Maximum dimensions of Cubemap texture
-    hipDeviceAttributeMaxTextureCubemapLayered,         ///< Maximum dimensions of Cubemap layered texture.
-    hipDeviceAttributeMaxThreadsDim,                    ///< Maximum dimension of a block
-    hipDeviceAttributeMaxThreadsPerBlock,               ///< Maximum number of threads per block.
-    hipDeviceAttributeMaxThreadsPerMultiProcessor,      ///< Maximum resident threads per multiprocessor.
-    hipDeviceAttributeMaxPitch,                         ///< Maximum pitch in bytes allowed by memory copies
-    hipDeviceAttributeMemoryBusWidth,                   ///< Global memory bus width in bits.
-    hipDeviceAttributeMemoryClockRate,                  ///< Peak memory clock frequency in kilohertz.
-    hipDeviceAttributeComputeCapabilityMinor,           ///< Minor compute capability version number.
-    hipDeviceAttributeMultiGpuBoardGroupID,             ///< Unique ID of device group on the same multi-GPU board
-    hipDeviceAttributeMultiprocessorCount,              ///< Number of multiprocessors on the device.
-    hipDeviceAttributeUnused1,                          ///< Previously hipDeviceAttributeName
-    hipDeviceAttributePageableMemoryAccess,             ///< Device supports coherently accessing pageable memory
-                                                        ///< without calling hipHostRegister on it
-    hipDeviceAttributePageableMemoryAccessUsesHostPageTables, ///< Device accesses pageable memory via the host's page tables
-    hipDeviceAttributePciBusId,                         ///< PCI Bus ID.
-    hipDeviceAttributePciDeviceId,                      ///< PCI Device ID.
-    hipDeviceAttributePciDomainID,                      ///< PCI Domain ID.
-    hipDeviceAttributePersistingL2CacheMaxSize,         ///< Maximum l2 persisting lines capacity in bytes
-    hipDeviceAttributeMaxRegistersPerBlock,             ///< 32-bit registers available to a thread block. This number is shared
-                                                        ///< by all thread blocks simultaneously resident on a multiprocessor.
-    hipDeviceAttributeMaxRegistersPerMultiprocessor,    ///< 32-bit registers available per block.
-    hipDeviceAttributeReservedSharedMemPerBlock,        ///< Shared memory reserved by CUDA driver per block.
-    hipDeviceAttributeMaxSharedMemoryPerBlock,          ///< Maximum shared memory available per block in bytes.
-    hipDeviceAttributeSharedMemPerBlockOptin,           ///< Maximum shared memory per block usable by special opt in.
-    hipDeviceAttributeSharedMemPerMultiprocessor,       ///< Shared memory available per multiprocessor.
-    hipDeviceAttributeSingleToDoublePrecisionPerfRatio, ///< Cuda only. Performance ratio of single precision to double precision.
-    hipDeviceAttributeStreamPrioritiesSupported,        ///< Whether to support stream priorities.
-    hipDeviceAttributeSurfaceAlignment,                 ///< Alignment requirement for surfaces
-    hipDeviceAttributeTccDriver,                        ///< Cuda only. Whether device is a Tesla device using TCC driver
-    hipDeviceAttributeTextureAlignment,                 ///< Alignment requirement for textures
-    hipDeviceAttributeTexturePitchAlignment,            ///< Pitch alignment requirement for 2D texture references bound to pitched memory;
-    hipDeviceAttributeTotalConstantMemory,              ///< Constant memory size in bytes.
-    hipDeviceAttributeTotalGlobalMem,                   ///< Global memory available on devicice.
-    hipDeviceAttributeUnifiedAddressing,                ///< Cuda only. An unified address space shared with the host.
-    hipDeviceAttributeUnused2,                          ///< Previously hipDeviceAttributeUuid
-    hipDeviceAttributeWarpSize,                         ///< Warp size in threads.
-    hipDeviceAttributeMemoryPoolsSupported,             ///< Device supports HIP Stream Ordered Memory Allocator
-    hipDeviceAttributeVirtualMemoryManagementSupported, ///< Device supports HIP virtual memory management
-    hipDeviceAttributeHostRegisterSupported,            ///< Can device support host memory registration via hipHostRegister
-    hipDeviceAttributeMemoryPoolSupportedHandleTypes,   ///< Supported handle mask for HIP Stream Ordered Memory Allocator
+  hipDeviceAttributeEccEnabled = hipDeviceAttributeCudaCompatibleBegin, // Whether ECC support is enabled.
+  hipDeviceAttributeAccessPolicyMaxWindowSize,                // Cuda only. The maximum size of the window policy in bytes.
+  hipDeviceAttributeAsyncEngineCount,                         // Asynchronous engines number.
+  hipDeviceAttributeCanMapHostMemory,                         // Whether host memory can be mapped into device address space
+  hipDeviceAttributeCanUseHostPointerForRegisteredMem,        // Device can access host registered memory
+                                                              // at the same virtual address as the CPU
+  hipDeviceAttributeClockRate,                                // Peak clock frequency in kilohertz.
+  hipDeviceAttributeComputeMode,                              // Compute mode that device is currently in.
+  hipDeviceAttributeComputePreemptionSupported,               // Device supports Compute Preemption.
+  hipDeviceAttributeConcurrentKernels,                        // Device can possibly execute multiple kernels concurrently.
+  hipDeviceAttributeConcurrentManagedAccess,                  // Device can coherently access managed memory concurrently with the CPU
+  hipDeviceAttributeCooperativeLaunch,                        // Support cooperative launch
+  hipDeviceAttributeCooperativeMultiDeviceLaunch,             // Support cooperative launch on multiple devices
+  hipDeviceAttributeDeviceOverlap,                            // Device can concurrently copy memory and execute a kernel.
+                                                              // Deprecated. Use instead asyncEngineCount.
+  hipDeviceAttributeDirectManagedMemAccessFromHost,           // Host can directly access managed memory on
+                                                              // the device without migration
+  hipDeviceAttributeGlobalL1CacheSupported,                   // Device supports caching globals in L1
+  hipDeviceAttributeHostNativeAtomicSupported,                // Link between the device and the host supports native atomic operations
+  hipDeviceAttributeIntegrated,                               // Device is integrated GPU
+  hipDeviceAttributeIsMultiGpuBoard,                          // Multiple GPU devices.
+  hipDeviceAttributeKernelExecTimeout,                        // Run time limit for kernels executed on the device
+  hipDeviceAttributeL2CacheSize,                              // Size of L2 cache in bytes. 0 if the device doesn't have L2 cache.
+  hipDeviceAttributeLocalL1CacheSupported,                    // caching locals in L1 is supported
+  hipDeviceAttributeLuid,                                     // 8-byte locally unique identifier in 8 bytes. Undefined on TCC and non-Windows platforms
+  hipDeviceAttributeLuidDeviceNodeMask,                       // Luid device node mask. Undefined on TCC and non-Windows platforms
+  hipDeviceAttributeComputeCapabilityMajor,                   // Major compute capability version number.
+  hipDeviceAttributeManagedMemory,                            // Device supports allocating managed memory on this system
+  hipDeviceAttributeMaxBlocksPerMultiProcessor,               // Max block size per multiprocessor
+  hipDeviceAttributeMaxBlockDimX,                             // Max block size in width.
+  hipDeviceAttributeMaxBlockDimY,                             // Max block size in height.
+  hipDeviceAttributeMaxBlockDimZ,                             // Max block size in depth.
+  hipDeviceAttributeMaxGridDimX,                              // Max grid size  in width.
+  hipDeviceAttributeMaxGridDimY,                              // Max grid size  in height.
+  hipDeviceAttributeMaxGridDimZ,                              // Max grid size  in depth.
+  hipDeviceAttributeMaxSurface1D,                             // Maximum size of 1D surface.
+  hipDeviceAttributeMaxSurface1DLayered,                      // Cuda only. Maximum dimensions of 1D layered surface.
+  hipDeviceAttributeMaxSurface2D,                             // Maximum dimension (width, height) of 2D surface.
+  hipDeviceAttributeMaxSurface2DLayered,                      // Cuda only. Maximum dimensions of 2D layered surface.
+  hipDeviceAttributeMaxSurface3D,                             // Maximum dimension (width, height, depth) of 3D surface.
+  hipDeviceAttributeMaxSurfaceCubemap,                        // Cuda only. Maximum dimensions of Cubemap surface.
+  hipDeviceAttributeMaxSurfaceCubemapLayered,                 // Cuda only. Maximum dimension of Cubemap layered surface.
+  hipDeviceAttributeMaxTexture1DWidth,                        // Maximum size of 1D texture.
+  hipDeviceAttributeMaxTexture1DLayered,                      // Maximum dimensions of 1D layered texture.
+  hipDeviceAttributeMaxTexture1DLinear,                       // Maximum number of elements allocatable in a 1D linear texture.
+                                                              // Use cudaDeviceGetTexture1DLinearMaxWidth() instead on Cuda.
+  hipDeviceAttributeMaxTexture1DMipmap,                       // Maximum size of 1D mipmapped texture.
+  hipDeviceAttributeMaxTexture2DWidth,                        // Maximum dimension width of 2D texture.
+  hipDeviceAttributeMaxTexture2DHeight,                       // Maximum dimension hight of 2D texture.
+  hipDeviceAttributeMaxTexture2DGather,                       // Maximum dimensions of 2D texture if gather operations  performed.
+  hipDeviceAttributeMaxTexture2DLayered,                      // Maximum dimensions of 2D layered texture.
+  hipDeviceAttributeMaxTexture2DLinear,                       // Maximum dimensions (width, height, pitch) of 2D textures bound to pitched memory.
+  hipDeviceAttributeMaxTexture2DMipmap,                       // Maximum dimensions of 2D mipmapped texture.
+  hipDeviceAttributeMaxTexture3DWidth,                        // Maximum dimension width of 3D texture.
+  hipDeviceAttributeMaxTexture3DHeight,                       // Maximum dimension height of 3D texture.
+  hipDeviceAttributeMaxTexture3DDepth,                        // Maximum dimension depth of 3D texture.
+  hipDeviceAttributeMaxTexture3DAlt,                          // Maximum dimensions of alternate 3D texture.
+  hipDeviceAttributeMaxTextureCubemap,                        // Maximum dimensions of Cubemap texture
+  hipDeviceAttributeMaxTextureCubemapLayered,                 // Maximum dimensions of Cubemap layered texture.
+  hipDeviceAttributeMaxThreadsDim,                            // Maximum dimension of a block
+  hipDeviceAttributeMaxThreadsPerBlock,                       // Maximum number of threads per block.
+  hipDeviceAttributeMaxThreadsPerMultiProcessor,              // Maximum resident threads per multiprocessor.
+  hipDeviceAttributeMaxPitch,                                 // Maximum pitch in bytes allowed by memory copies
+  hipDeviceAttributeMemoryBusWidth,                           // Global memory bus width in bits.
+  hipDeviceAttributeMemoryClockRate,                          // Peak memory clock frequency in kilohertz.
+  hipDeviceAttributeComputeCapabilityMinor,                   // Minor compute capability version number.
+  hipDeviceAttributeMultiGpuBoardGroupID,                     // Unique ID of device group on the same multi-GPU board
+  hipDeviceAttributeMultiprocessorCount,                      // Number of multiprocessors on the device.
+  hipDeviceAttributeUnused1,                                  // Previously hipDeviceAttributeName
+  hipDeviceAttributePageableMemoryAccess,                     // Device supports coherently accessing pageable memory
+                                                              // without calling hipHostRegister on it
+  hipDeviceAttributePageableMemoryAccessUsesHostPageTables,   // Device accesses pageable memory via the host's page tables
+  hipDeviceAttributePciBusId,                                 // PCI Bus ID.
+  hipDeviceAttributePciDeviceId,                              // PCI Device ID.
+  hipDeviceAttributePciDomainID,                              // PCI Domain ID.
+  hipDeviceAttributePersistingL2CacheMaxSize,                 // Maximum l2 persisting lines capacity in bytes
+  hipDeviceAttributeMaxRegistersPerBlock,                     // 32-bit registers available to a thread block. This number is shared
+                                                              // by all thread blocks simultaneously resident on a multiprocessor.
+  hipDeviceAttributeMaxRegistersPerMultiprocessor,            // 32-bit registers available per block.
+  hipDeviceAttributeReservedSharedMemPerBlock,                // Shared memory reserved by CUDA driver per block.
+  hipDeviceAttributeMaxSharedMemoryPerBlock,                  // Maximum shared memory available per block in bytes.
+  hipDeviceAttributeSharedMemPerBlockOptin,                   // Maximum shared memory per block usable by special opt in.
+  hipDeviceAttributeSharedMemPerMultiprocessor,               // Shared memory available per multiprocessor.
+  hipDeviceAttributeSingleToDoublePrecisionPerfRatio,         // Cuda only. Performance ratio of single precision to double precision.
+  hipDeviceAttributeStreamPrioritiesSupported,                // Whether to support stream priorities.
+  hipDeviceAttributeSurfaceAlignment,                         // Alignment requirement for surfaces
+  hipDeviceAttributeTccDriver,                                // Cuda only. Whether device is a Tesla device using TCC driver
+  hipDeviceAttributeTextureAlignment,                         // Alignment requirement for textures
+  hipDeviceAttributeTexturePitchAlignment,                    // Pitch alignment requirement for 2D texture references bound to pitched memory;
+  hipDeviceAttributeTotalConstantMemory,                      // Constant memory size in bytes.
+  hipDeviceAttributeTotalGlobalMem,                           // Global memory available on devicice.
+  hipDeviceAttributeUnifiedAddressing,                        // Cuda only. An unified address space shared with the host.
+  hipDeviceAttributeUnused2,                                  // Previously hipDeviceAttributeUuid
+  hipDeviceAttributeWarpSize,                                 // Warp size in threads.
+  hipDeviceAttributeMemoryPoolsSupported,                     // Device supports HIP Stream Ordered Memory Allocator
+  hipDeviceAttributeVirtualMemoryManagementSupported,         // Device supports HIP virtual memory management
+  hipDeviceAttributeHostRegisterSupported,                    // Can device support host memory registration via hipHostRegister
+  hipDeviceAttributeMemoryPoolSupportedHandleTypes,           // Supported handle mask for HIP Stream Ordered Memory Allocator
 
-    hipDeviceAttributeCudaCompatibleEnd = 9999,
-    hipDeviceAttributeAmdSpecificBegin = 10000,
+  hipDeviceAttributeCudaCompatibleEnd = 9999,
+  hipDeviceAttributeAmdSpecificBegin = 10000,
 
-    hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin,  ///< Frequency in khz of the timer used by the device-side "clock*"
-    hipDeviceAttributeUnused3,                                  ///< Previously hipDeviceAttributeArch
-    hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,         ///< Maximum Shared Memory PerMultiprocessor.
-    hipDeviceAttributeUnused4,                                  ///< Previously hipDeviceAttributeGcnArch
-    hipDeviceAttributeUnused5,                                  ///< Previously hipDeviceAttributeGcnArchName
-    hipDeviceAttributeHdpMemFlushCntl,                          ///< Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
-    hipDeviceAttributeHdpRegFlushCntl,                          ///< Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,      ///< Supports cooperative launch on multiple
-                                                                ///< devices with unmatched functions
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,   ///< Supports cooperative launch on multiple
-                                                                ///< devices with unmatched grid dimensions
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,  ///< Supports cooperative launch on multiple
-                                                                ///< devices with unmatched block dimensions
-    hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, ///< Supports cooperative launch on multiple
-                                                                ///< devices with unmatched shared memories
-    hipDeviceAttributeIsLargeBar,                               ///< Whether it is LargeBar
-    hipDeviceAttributeAsicRevision,                             ///< Revision of the GPU in this device
-    hipDeviceAttributeCanUseStreamWaitValue,                    ///< '1' if Device supports hipStreamWaitValue32() and
-                                                                ///< hipStreamWaitValue64(), '0' otherwise.
-    hipDeviceAttributeImageSupport,                             ///< '1' if Device supports image, '0' otherwise.
-    hipDeviceAttributePhysicalMultiProcessorCount,              ///< All available physical compute
-                                                                ///< units for the device
-    hipDeviceAttributeFineGrainSupport,                         ///< '1' if Device supports fine grain, '0' otherwise
-    hipDeviceAttributeWallClockRate,                            ///< Constant frequency of wall clock in kilohertz.
+  hipDeviceAttributeClockInstructionRate = hipDeviceAttributeAmdSpecificBegin,  // Frequency in khz of the timer used by the device-side "clock*"
+  hipDeviceAttributeUnused3,                                  // Previously hipDeviceAttributeArch
+  hipDeviceAttributeMaxSharedMemoryPerMultiprocessor,         // Maximum Shared Memory PerMultiprocessor.
+  hipDeviceAttributeUnused4,                                  // Previously hipDeviceAttributeGcnArch
+  hipDeviceAttributeUnused5,                                  // Previously hipDeviceAttributeGcnArchName
+  hipDeviceAttributeHdpMemFlushCntl,                          // Address of the HDP_MEM_COHERENCY_FLUSH_CNTL register
+  hipDeviceAttributeHdpRegFlushCntl,                          // Address of the HDP_REG_COHERENCY_FLUSH_CNTL register
+  hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc,      // Supports cooperative launch on multiple devices with unmatched functions
+  hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim,   // Supports cooperative launch on multiple devices with unmatched grid dimensions
+  hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim,  // Supports cooperative launch on multiple devices with unmatched block dimensions
+  hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem, // Supports cooperative launch on multiple devices with unmatched shared memories
+  hipDeviceAttributeIsLargeBar,                               // Whether it is LargeBar
+  hipDeviceAttributeAsicRevision,                             // Revision of the GPU in this device
+  hipDeviceAttributeCanUseStreamWaitValue,                    // '1' if Device supports hipStreamWaitValue32() and hipStreamWaitValue64(), '0' otherwise.
+  hipDeviceAttributeImageSupport,                             // '1' if Device supports image, '0' otherwise.
+  hipDeviceAttributePhysicalMultiProcessorCount,              // All available physical compute units for the device
+  hipDeviceAttributeFineGrainSupport,                         // '1' if Device supports fine grain, '0' otherwise
+  hipDeviceAttributeWallClockRate,                            // Constant frequency of wall clock in kilohertz.
+
+  hipDeviceAttributeAmdSpecificEnd = 19999,
+  hipDeviceAttributeVendorSpecificBegin = 20000,
+  // Extended attributes for vendors
 
-    hipDeviceAttributeAmdSpecificEnd = 19999,
-    hipDeviceAttributeVendorSpecificBegin = 20000,
-    // Extended attributes for vendors
 } hipDeviceAttribute_t;
 
 /**
  * hipDeviceArch_t
  *
  */
-typedef struct {
-    // 32-bit Atomics
-    unsigned hasGlobalInt32Atomics : 1;     ///< 32-bit integer atomics for global memory.
-    unsigned hasGlobalFloatAtomicExch : 1;  ///< 32-bit float atomic exch for global memory.
-    unsigned hasSharedInt32Atomics : 1;     ///< 32-bit integer atomics for shared memory.
-    unsigned hasSharedFloatAtomicExch : 1;  ///< 32-bit float atomic exch for shared memory.
-    unsigned hasFloatAtomicAdd : 1;  ///< 32-bit float atomic add in global and shared memory.
+typedef struct
+{
+  // 32-bit Atomics
+  unsigned hasGlobalInt32Atomics : 1;     // 32-bit integer atomics for global memory.
+  unsigned hasGlobalFloatAtomicExch : 1;  // 32-bit float atomic exch for global memory.
+  unsigned hasSharedInt32Atomics : 1;     // 32-bit integer atomics for shared memory.
+  unsigned hasSharedFloatAtomicExch : 1;  // 32-bit float atomic exch for shared memory.
+  unsigned hasFloatAtomicAdd : 1;         // 32-bit float atomic add in global and shared memory.
 
-    // 64-bit Atomics
-    unsigned hasGlobalInt64Atomics : 1;  ///< 64-bit integer atomics for global memory.
-    unsigned hasSharedInt64Atomics : 1;  ///< 64-bit integer atomics for shared memory.
+  // 64-bit Atomics
+  unsigned hasGlobalInt64Atomics : 1;     // 64-bit integer atomics for global memory.
+  unsigned hasSharedInt64Atomics : 1;     // 64-bit integer atomics for shared memory.
 
-    // Doubles
-    unsigned hasDoubles : 1;  ///< Double-precision floating point.
+  // Doubles
+  unsigned hasDoubles : 1;                // Double-precision floating point.
 
-    // Warp cross-lane operations
-    unsigned hasWarpVote : 1;     ///< Warp vote instructions (__any, __all).
-    unsigned hasWarpBallot : 1;   ///< Warp ballot instructions (__ballot).
-    unsigned hasWarpShuffle : 1;  ///< Warp shuffle operations. (__shfl_*).
-    unsigned hasFunnelShift : 1;  ///< Funnel two words into one with shift&mask caps.
+  // Warp cross-lane operations
+  unsigned hasWarpVote : 1;               // Warp vote instructions (__any, __all).
+  unsigned hasWarpBallot : 1;             // Warp ballot instructions (__ballot).
+  unsigned hasWarpShuffle : 1;            // Warp shuffle operations. (__shfl_*).
+  unsigned hasFunnelShift : 1;            // Funnel two words into one with shift&mask caps.
 
-    // Sync
-    unsigned hasThreadFenceSystem : 1;  ///< __threadfence_system.
-    unsigned hasSyncThreadsExt : 1;     ///< __syncthreads_count, syncthreads_and, syncthreads_or.
+  // Sync
+  unsigned hasThreadFenceSystem : 1;      // __threadfence_system.
+  unsigned hasSyncThreadsExt : 1;         // __syncthreads_count, syncthreads_and, syncthreads_or.
+
+  // Misc
+  unsigned hasSurfaceFuncs : 1;           // Surface functions.
+  unsigned has3dGrid : 1;                 // Grid and group dims are 3D (rather than 2D).
+  unsigned hasDynamicParallelism : 1;     // Dynamic parallelism.
 
-    // Misc
-    unsigned hasSurfaceFuncs : 1;        ///< Surface functions.
-    unsigned has3dGrid : 1;              ///< Grid and group dims are 3D (rather than 2D).
-    unsigned hasDynamicParallelism : 1;  ///< Dynamic parallelism.
 } hipDeviceArch_t;
 
-typedef struct hipUUID_t {
-    char bytes[16];
+typedef struct hipUUID_t
+{
+  char bytes[16];
+
 } hipUUID;
 
 
@@ -349,149 +333,127 @@ typedef struct hipUUID_t {
  * hipDeviceProp
  *
  */
-typedef struct hipDeviceProp_t {
-    char name[256];                   ///< Device name.
-    hipUUID uuid;                     ///< UUID of a device
-    char luid[8];                     ///< 8-byte unique identifier. Only valid on windows
-    unsigned int luidDeviceNodeMask;  ///< LUID node mask
-    size_t totalGlobalMem;            ///< Size of global memory region (in bytes).
-    size_t sharedMemPerBlock;         ///< Size of shared memory per block (in bytes).
-    int regsPerBlock;                 ///< Registers per block.
-    int warpSize;                     ///< Warp size.
-    size_t memPitch;                  ///< Maximum pitch in bytes allowed by memory copies
-                                      ///< pitched memory
-    int maxThreadsPerBlock;           ///< Max work items per work group or workgroup max size.
-    int maxThreadsDim[3];             ///< Max number of threads in each dimension (XYZ) of a block.
-    int maxGridSize[3];               ///< Max grid dimensions (XYZ).
-    int clockRate;                    ///< Max clock frequency of the multiProcessors in khz.
-    size_t totalConstMem;             ///< Size of shared constant memory region on the device
-                                      ///< (in bytes).
-    int major;  ///< Major compute capability.  On HCC, this is an approximation and features may
-                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
-                ///< feature caps.
-    int minor;  ///< Minor compute capability.  On HCC, this is an approximation and features may
-                ///< differ from CUDA CC.  See the arch feature flags for portable ways to query
-                ///< feature caps.
-    size_t textureAlignment;       ///< Alignment requirement for textures
-    size_t texturePitchAlignment;  ///< Pitch alignment requirement for texture references bound to
-    int deviceOverlap;             ///< Deprecated. Use asyncEngineCount instead
-    int multiProcessorCount;       ///< Number of multi-processors (compute units).
-    int kernelExecTimeoutEnabled;  ///< Run time limit for kernels executed on the device
-    int integrated;                ///< APU vs dGPU
-    int canMapHostMemory;          ///< Check whether HIP can map host memory
-    int computeMode;               ///< Compute mode.
-    int maxTexture1D;              ///< Maximum number of elements in 1D images
-    int maxTexture1DMipmap;        ///< Maximum 1D mipmap texture size
-    int maxTexture1DLinear;        ///< Maximum size for 1D textures bound to linear memory
-    int maxTexture2D[2];  ///< Maximum dimensions (width, height) of 2D images, in image elements
-    int maxTexture2DMipmap[2];  ///< Maximum number of elements in 2D array mipmap of images
-    int maxTexture2DLinear[3];  ///< Maximum 2D tex dimensions if tex are bound to pitched memory
-    int maxTexture2DGather[2];  ///< Maximum 2D tex dimensions if gather has to be performed
-    int maxTexture3D[3];  ///< Maximum dimensions (width, height, depth) of 3D images, in image
-                          ///< elements
-    int maxTexture3DAlt[3];           ///< Maximum alternate 3D texture dims
-    int maxTextureCubemap;            ///< Maximum cubemap texture dims
-    int maxTexture1DLayered[2];       ///< Maximum number of elements in 1D array images
-    int maxTexture2DLayered[3];       ///< Maximum number of elements in 2D array images
-    int maxTextureCubemapLayered[2];  ///< Maximum cubemaps layered texture dims
-    int maxSurface1D;                 ///< Maximum 1D surface size
-    int maxSurface2D[2];              ///< Maximum 2D surface size
-    int maxSurface3D[3];              ///< Maximum 3D surface size
-    int maxSurface1DLayered[2];       ///< Maximum 1D layered surface size
-    int maxSurface2DLayered[3];       ///< Maximum 2D layared surface size
-    int maxSurfaceCubemap;            ///< Maximum cubemap surface size
-    int maxSurfaceCubemapLayered[2];  ///< Maximum cubemap layered surface size
-    size_t surfaceAlignment;          ///< Alignment requirement for surface
-    int concurrentKernels;         ///< Device can possibly execute multiple kernels concurrently.
-    int ECCEnabled;                ///< Device has ECC support enabled
-    int pciBusID;                  ///< PCI Bus ID.
-    int pciDeviceID;               ///< PCI Device ID.
-    int pciDomainID;               ///< PCI Domain ID
-    int tccDriver;                 ///< 1:If device is Tesla device using TCC driver, else 0
-    int asyncEngineCount;          ///< Number of async engines
-    int unifiedAddressing;         ///< Does device and host share unified address space
-    int memoryClockRate;           ///< Max global memory clock frequency in khz.
-    int memoryBusWidth;            ///< Global memory bus width in bits.
-    int l2CacheSize;               ///< L2 cache size.
-    int persistingL2CacheMaxSize;  ///< Device's max L2 persisting lines in bytes
-    int maxThreadsPerMultiProcessor;    ///< Maximum resident threads per multi-processor.
-    int streamPrioritiesSupported;      ///< Device supports stream priority
-    int globalL1CacheSupported;         ///< Indicates globals are cached in L1
-    int localL1CacheSupported;          ///< Locals are cahced in L1
-    size_t sharedMemPerMultiprocessor;  ///< Amount of shared memory available per multiprocessor.
-    int regsPerMultiprocessor;          ///< registers available per multiprocessor
-    int managedMemory;         ///< Device supports allocating managed memory on this system
-    int isMultiGpuBoard;       ///< 1 if device is on a multi-GPU board, 0 if not.
-    int multiGpuBoardGroupID;  ///< Unique identifier for a group of devices on same multiboard GPU
-    int hostNativeAtomicSupported;         ///< Link between host and device supports native atomics
-    int singleToDoublePrecisionPerfRatio;  ///< Deprecated. CUDA only.
-    int pageableMemoryAccess;              ///< Device supports coherently accessing pageable memory
-                                           ///< without calling hipHostRegister on it
-    int concurrentManagedAccess;  ///< Device can coherently access managed memory concurrently with
-                                  ///< the CPU
-    int computePreemptionSupported;         ///< Is compute preemption supported on the device
-    int canUseHostPointerForRegisteredMem;  ///< Device can access host registered memory with same
-                                            ///< address as the host
-    int cooperativeLaunch;                  ///< HIP device supports cooperative launch
-    int cooperativeMultiDeviceLaunch;       ///< HIP device supports cooperative launch on multiple
-                                            ///< devices
-    size_t
-        sharedMemPerBlockOptin;  ///< Per device m ax shared mem per block usable by special opt in
-    int pageableMemoryAccessUsesHostPageTables;  ///< Device accesses pageable memory via the host's
-                                                 ///< page tables
-    int directManagedMemAccessFromHost;  ///< Host can directly access managed memory on the device
-                                         ///< without migration
-    int maxBlocksPerMultiProcessor;      ///< Max number of blocks on CU
-    int accessPolicyMaxWindowSize;       ///< Max value of access policy window
-    size_t reservedSharedMemPerBlock;    ///< Shared memory reserved by driver per block
-    int hostRegisterSupported;           ///< Device supports hipHostRegister
-    int sparseHipArraySupported;         ///< Indicates if device supports sparse hip arrays
-    int hostRegisterReadOnlySupported;   ///< Device supports using the hipHostRegisterReadOnly flag
-                                         ///< with hipHostRegistger
-    int timelineSemaphoreInteropSupported;  ///< Indicates external timeline semaphore support
-    int memoryPoolsSupported;  ///< Indicates if device supports hipMallocAsync and hipMemPool APIs
-    int gpuDirectRDMASupported;                    ///< Indicates device support of RDMA APIs
-    unsigned int gpuDirectRDMAFlushWritesOptions;  ///< Bitmask to be interpreted according to
-                                                   ///< hipFlushGPUDirectRDMAWritesOptions
-    int gpuDirectRDMAWritesOrdering;               ///< value of hipGPUDirectRDMAWritesOrdering
-    unsigned int
-        memoryPoolSupportedHandleTypes;  ///< Bitmask of handle types support with mempool based IPC
-    int deferredMappingHipArraySupported;  ///< Device supports deferred mapping HIP arrays and HIP
-                                           ///< mipmapped arrays
-    int ipcEventSupported;                 ///< Device supports IPC events
-    int clusterLaunch;                     ///< Device supports cluster launch
-    int unifiedFunctionPointers;           ///< Indicates device supports unified function pointers
-    int reserved[63];                      ///< CUDA Reserved.
+typedef struct hipDeviceProp_t
+{
+  char name[256];                                // Device name.
+  hipUUID uuid;                                  // UUID of a device
+  char luid[8];                                  // 8-byte unique identifier. Only valid on windows
+  unsigned int luidDeviceNodeMask;               // LUID node mask
+  size_t totalGlobalMem;                         // Size of global memory region (in bytes).
+  size_t sharedMemPerBlock;                      // Size of shared memory per block (in bytes).
+  int regsPerBlock;                              // Registers per block.
+  int warpSize;                                  // Warp size.
+  size_t memPitch;                               // Maximum pitch in bytes allowed by memory copies pitched memory
+  int maxThreadsPerBlock;                        // Max work items per work group or workgroup max size.
+  int maxThreadsDim[3];                          // Max number of threads in each dimension (XYZ) of a block.
+  int maxGridSize[3];                            // Max grid dimensions (XYZ).
+  int clockRate;                                 // Max clock frequency of the multiProcessors in khz.
+  size_t totalConstMem;                          // Size of shared constant memory region on the device (in bytes).
+  int major;                                     // Major compute capability. On HCC, this is an approximation and features may
+                                                 // differ from CUDA CC.  See the arch feature flags for portable ways to query feature caps.
+  int minor;                                     // Minor compute capability. On HCC, this is an approximation and features may
+                                                 // differ from CUDA CC.  See the arch feature flags for portable ways to query feature caps.
+  size_t textureAlignment;                       // Alignment requirement for textures
+  size_t texturePitchAlignment;                  // Pitch alignment requirement for texture references bound to
+  int deviceOverlap;                             // Deprecated. Use asyncEngineCount instead
+  int multiProcessorCount;                       // Number of multi-processors (compute units).
+  int kernelExecTimeoutEnabled;                  // Run time limit for kernels executed on the device
+  int integrated;                                // APU vs dGPU
+  int canMapHostMemory;                          // Check whether HIP can map host memory
+  int computeMode;                               // Compute mode.
+  int maxTexture1D;                              // Maximum number of elements in 1D images
+  int maxTexture1DMipmap;                        // Maximum 1D mipmap texture size
+  int maxTexture1DLinear;                        // Maximum size for 1D textures bound to linear memory
+  int maxTexture2D[2];                           // Maximum dimensions (width, height) of 2D images, in image elements
+  int maxTexture2DMipmap[2];                     // Maximum number of elements in 2D array mipmap of images
+  int maxTexture2DLinear[3];                     // Maximum 2D tex dimensions if tex are bound to pitched memory
+  int maxTexture2DGather[2];                     // Maximum 2D tex dimensions if gather has to be performed
+  int maxTexture3D[3];                           // Maximum dimensions (width, height, depth) of 3D images, in image elements
+  int maxTexture3DAlt[3];                        // Maximum alternate 3D texture dims
+  int maxTextureCubemap;                         // Maximum cubemap texture dims
+  int maxTexture1DLayered[2];                    // Maximum number of elements in 1D array images
+  int maxTexture2DLayered[3];                    // Maximum number of elements in 2D array images
+  int maxTextureCubemapLayered[2];               // Maximum cubemaps layered texture dims
+  int maxSurface1D;                              // Maximum 1D surface size
+  int maxSurface2D[2];                           // Maximum 2D surface size
+  int maxSurface3D[3];                           // Maximum 3D surface size
+  int maxSurface1DLayered[2];                    // Maximum 1D layered surface size
+  int maxSurface2DLayered[3];                    // Maximum 2D layared surface size
+  int maxSurfaceCubemap;                         // Maximum cubemap surface size
+  int maxSurfaceCubemapLayered[2];               // Maximum cubemap layered surface size
+  size_t surfaceAlignment;                       // Alignment requirement for surface
+  int concurrentKernels;                         // Device can possibly execute multiple kernels concurrently.
+  int ECCEnabled;                                // Device has ECC support enabled
+  int pciBusID;                                  // PCI Bus ID.
+  int pciDeviceID;                               // PCI Device ID.
+  int pciDomainID;                               // PCI Domain ID
+  int tccDriver;                                 // 1:If device is Tesla device using TCC driver, else 0
+  int asyncEngineCount;                          // Number of async engines
+  int unifiedAddressing;                         // Does device and host share unified address space
+  int memoryClockRate;                           // Max global memory clock frequency in khz.
+  int memoryBusWidth;                            // Global memory bus width in bits.
+  int l2CacheSize;                               // L2 cache size.
+  int persistingL2CacheMaxSize;                  // Device's max L2 persisting lines in bytes
+  int maxThreadsPerMultiProcessor;               // Maximum resident threads per multi-processor.
+  int streamPrioritiesSupported;                 // Device supports stream priority
+  int globalL1CacheSupported;                    // Indicates globals are cached in L1
+  int localL1CacheSupported;                     // Locals are cahced in L1
+  size_t sharedMemPerMultiprocessor;             // Amount of shared memory available per multiprocessor.
+  int regsPerMultiprocessor;                     // registers available per multiprocessor
+  int managedMemory;                             // Device supports allocating managed memory on this system
+  int isMultiGpuBoard;                           // 1 if device is on a multi-GPU board, 0 if not.
+  int multiGpuBoardGroupID;                      // Unique identifier for a group of devices on same multiboard GPU
+  int hostNativeAtomicSupported;                 // Link between host and device supports native atomics
+  int singleToDoublePrecisionPerfRatio;          // Deprecated. CUDA only.
+  int pageableMemoryAccess;                      // Device supports coherently accessing pageable memory
+                                                 // without calling hipHostRegister on it
+  int concurrentManagedAccess;                   // Device can coherently access managed memory concurrently with the CPU
+  int computePreemptionSupported;                // Is compute preemption supported on the device
+  int canUseHostPointerForRegisteredMem;         // Device can access host registered memory with same address as the host
+  int cooperativeLaunch;                         // HIP device supports cooperative launch
+  int cooperativeMultiDeviceLaunch;              // HIP device supports cooperative launch on multiple devices
+  size_t sharedMemPerBlockOptin;                 // Per device m ax shared mem per block usable by special opt in
+  int pageableMemoryAccessUsesHostPageTables;    // Device accesses pageable memory via the host's page tables
+  int directManagedMemAccessFromHost;            // Host can directly access managed memory on the device without migration
+  int maxBlocksPerMultiProcessor;                // Max number of blocks on CU
+  int accessPolicyMaxWindowSize;                 // Max value of access policy window
+  size_t reservedSharedMemPerBlock;              // Shared memory reserved by driver per block
+  int hostRegisterSupported;                     // Device supports hipHostRegister
+  int sparseHipArraySupported;                   // Indicates if device supports sparse hip arrays
+  int hostRegisterReadOnlySupported;             // Device supports using the hipHostRegisterReadOnly flag with hipHostRegistger
+  int timelineSemaphoreInteropSupported;         // Indicates external timeline semaphore support
+  int memoryPoolsSupported;                      // Indicates if device supports hipMallocAsync and hipMemPool APIs
+  int gpuDirectRDMASupported;                    // Indicates device support of RDMA APIs
+  unsigned int gpuDirectRDMAFlushWritesOptions;  // Bitmask to be interpreted according to hipFlushGPUDirectRDMAWritesOptions
+  int gpuDirectRDMAWritesOrdering;               // value of hipGPUDirectRDMAWritesOrdering
+  unsigned int memoryPoolSupportedHandleTypes;   // Bitmask of handle types support with mempool based IPC
+  int deferredMappingHipArraySupported;          // Device supports deferred mapping HIP arrays and HIP mipmapped arrays
+  int ipcEventSupported;                         // Device supports IPC events
+  int clusterLaunch;                             // Device supports cluster launch
+  int unifiedFunctionPointers;                   // Indicates device supports unified function pointers
+  int reserved[63];                              // CUDA Reserved.
 
-    int hipReserved[32];  ///< Reserved for adding new entries for HIP/CUDA.
+  int hipReserved[32];                           // Reserved for adding new entries for HIP/CUDA.
+
+  /* HIP Only struct members */
+  char gcnArchName[256];                         // AMD GCN Arch Name. HIP Only.
+  size_t maxSharedMemoryPerMultiProcessor;       // Maximum Shared Memory Per CU. HIP Only.
+  int clockInstructionRate;                      // Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP.
+  hipDeviceArch_t arch;                          // Architectural feature flags.  New for HIP.
+  unsigned int* hdpMemFlushCntl;                 // Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
+  unsigned int* hdpRegFlushCntl;                 // Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
+  int cooperativeMultiDeviceUnmatchedFunc;       // HIP device supports cooperative launch on multiple devices with unmatched functions
+  int cooperativeMultiDeviceUnmatchedGridDim;    // HIP device supports cooperative launch on multiple devices with unmatched grid dimensions
+  int cooperativeMultiDeviceUnmatchedBlockDim;   // HIP device supports cooperative launch on multiple devices with unmatched block dimensions
+  int cooperativeMultiDeviceUnmatchedSharedMem;  // HIP device supports cooperative launch on multiple devices with unmatched shared memories
+  int isLargeBar;                                // 1: if it is a large PCI bar device, else 0
+  int asicRevision;                              // Revision of the GPU in this device
 
-    /* HIP Only struct members */
-    char gcnArchName[256];                    ///< AMD GCN Arch Name. HIP Only.
-    size_t maxSharedMemoryPerMultiProcessor;  ///< Maximum Shared Memory Per CU. HIP Only.
-    int clockInstructionRate;  ///< Frequency in khz of the timer used by the device-side "clock*"
-                               ///< instructions.  New for HIP.
-    hipDeviceArch_t arch;      ///< Architectural feature flags.  New for HIP.
-    unsigned int* hdpMemFlushCntl;            ///< Addres of HDP_MEM_COHERENCY_FLUSH_CNTL register
-    unsigned int* hdpRegFlushCntl;            ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
-    int cooperativeMultiDeviceUnmatchedFunc;  ///< HIP device supports cooperative launch on
-                                              ///< multiple
-                                              /// devices with unmatched functions
-    int cooperativeMultiDeviceUnmatchedGridDim;    ///< HIP device supports cooperative launch on
-                                                   ///< multiple
-                                                   /// devices with unmatched grid dimensions
-    int cooperativeMultiDeviceUnmatchedBlockDim;   ///< HIP device supports cooperative launch on
-                                                   ///< multiple
-                                                   /// devices with unmatched block dimensions
-    int cooperativeMultiDeviceUnmatchedSharedMem;  ///< HIP device supports cooperative launch on
-                                                   ///< multiple
-                                                   /// devices with unmatched shared memories
-    int isLargeBar;                                ///< 1: if it is a large PCI bar device, else 0
-    int asicRevision;                              ///< Revision of the GPU in this device
 } hipDeviceProp_t;
 
 //Flags that can be used with hipStreamCreateWithFlags.
 /** Default stream creation flags. These are used with hipStreamCreate().*/
-#define hipStreamDefault  0x00
+#define hipStreamDefault 0x00
 
 /** Stream does not implicitly synchronize with null stream.*/
 #define hipStreamNonBlocking 0x01
@@ -511,46 +473,48 @@ typedef struct hipDeviceProp_t {
 
 /** Use a device-scope release when recording this event. This flag is useful to obtain more
  * precise timings of commands between events.  The flag is a no-op on CUDA platforms.*/
-#define hipEventReleaseToDevice  0x40000000
+#define hipEventReleaseToDevice 0x40000000
 
 /** Use a system-scope release when recording this event. This flag is useful to make
  * non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.*/
-#define hipEventReleaseToSystem  0x80000000
+#define hipEventReleaseToSystem 0x80000000
 
 /** Automatically select between Spin and Yield.*/
 #define hipDeviceScheduleAuto 0x0
 
 /** Dedicate a CPU core to spin-wait. Provides lowest latency, but burns a CPU core and may
  * consume more power.*/
-#define hipDeviceScheduleSpin  0x1
+#define hipDeviceScheduleSpin 0x1
 
 /** Yield the CPU to the operating system when waiting. May increase latency, but lowers power
  * and is friendlier to other threads in the system.*/
-#define hipDeviceScheduleYield  0x2
+#define hipDeviceScheduleYield 0x2
 #define hipDeviceScheduleBlockingSync 0x4
 #define hipDeviceScheduleMask 0x7
 #define hipDeviceMapHost 0x8
 #define hipDeviceLmemResizeToMax 0x16
 
-typedef enum hipJitOption {
-    hipJitOptionMaxRegisters = 0,
-    hipJitOptionThreadsPerBlock,
-    hipJitOptionWallTime,
-    hipJitOptionInfoLogBuffer,
-    hipJitOptionInfoLogBufferSizeBytes,
-    hipJitOptionErrorLogBuffer,
-    hipJitOptionErrorLogBufferSizeBytes,
-    hipJitOptionOptimizationLevel,
-    hipJitOptionTargetFromContext,
-    hipJitOptionTarget,
-    hipJitOptionFallbackStrategy,
-    hipJitOptionGenerateDebugInfo,
-    hipJitOptionLogVerbose,
-    hipJitOptionGenerateLineInfo,
-    hipJitOptionCacheMode,
-    hipJitOptionSm3xOpt,
-    hipJitOptionFastCompile,
-    hipJitOptionNumOptions
+typedef enum hipJitOption
+{
+  hipJitOptionMaxRegisters = 0,
+  hipJitOptionThreadsPerBlock,
+  hipJitOptionWallTime,
+  hipJitOptionInfoLogBuffer,
+  hipJitOptionInfoLogBufferSizeBytes,
+  hipJitOptionErrorLogBuffer,
+  hipJitOptionErrorLogBufferSizeBytes,
+  hipJitOptionOptimizationLevel,
+  hipJitOptionTargetFromContext,
+  hipJitOptionTarget,
+  hipJitOptionFallbackStrategy,
+  hipJitOptionGenerateDebugInfo,
+  hipJitOptionLogVerbose,
+  hipJitOptionGenerateLineInfo,
+  hipJitOptionCacheMode,
+  hipJitOptionSm3xOpt,
+  hipJitOptionFastCompile,
+  hipJitOptionNumOptions
+
 } hipJitOption;
 
 // stop: hip_runtime_api.h
@@ -563,11 +527,17 @@ typedef enum hipJitOption {
 
 #define HIP_API_CALL HIPAPI
 
+// deprecated
 typedef hipError_t (HIP_API_CALL *HIP_HIPCTXCREATE)              (hipCtx_t *, unsigned int, hipDevice_t);
+// deprecated
 typedef hipError_t (HIP_API_CALL *HIP_HIPCTXDESTROY)             (hipCtx_t);
+// deprecated
 typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPOPCURRENT)          (hipCtx_t *);
+// deprecated
 typedef hipError_t (HIP_API_CALL *HIP_HIPCTXPUSHCURRENT)         (hipCtx_t);
+// deprecated
 typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSETCURRENT)          (hipCtx_t);
+// deprecated
 typedef hipError_t (HIP_API_CALL *HIP_HIPCTXSYNCHRONIZE)         (void);
 typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETATTRIBUTE)     (int *, hipDeviceAttribute_t, hipDevice_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETCOUNT)         (int *);
@@ -575,7 +545,8 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGET)              (hipDevice_t *,
 typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICEGETNAME)          (char *, int, hipDevice_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPDEVICETOTALMEM)         (size_t *, hipDevice_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPDRIVERGETVERSION)       (int *);
-typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE)            (hipEvent_t *, unsigned int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATE)            (hipEvent_t *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTCREATEWITHFLAGS)   (hipEvent_t *, unsigned int);
 typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTDESTROY)           (hipEvent_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTELAPSEDTIME)       (float *, hipEvent_t, hipEvent_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPEVENTRECORD)            (hipEvent_t, hipStream_t);
@@ -603,21 +574,30 @@ typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEGETGLOBAL)        (hipDeviceptr_t
 typedef hipError_t (HIP_API_CALL *HIP_HIPMODULELOADDATAEX)       (hipModule_t *, const void *, unsigned int, hipJitOption *, void **);
 typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEUNLOAD)           (hipModule_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPRUNTIMEGETVERSION)      (int *);
-typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE)           (hipStream_t *, unsigned int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSETDEVICE)              (hipDevice_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSETDEVICEFLAGS)         (unsigned int);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATE)           (hipStream_t *);
+typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMCREATEWITHFLAGS)  (hipStream_t *, unsigned int);
 typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMDESTROY)          (hipStream_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPSTREAMSYNCHRONIZE)      (hipStream_t);
 typedef hipError_t (HIP_API_CALL *HIP_HIPGETDEVICEPROPERTIES)    (hipDeviceProp_t *, hipDevice_t);
-typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)    (int *, hipFunction_t, int, size_t);
+typedef hipError_t (HIP_API_CALL *HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR)  (int *, hipFunction_t, int, size_t);
 
 typedef struct hc_hip_lib
 {
   hc_dynlib_t lib;
 
+  // deprecated
   HIP_HIPCTXCREATE              hipCtxCreate;
+  // deprecated
   HIP_HIPCTXDESTROY             hipCtxDestroy;
+  // deprecated
   HIP_HIPCTXPOPCURRENT          hipCtxPopCurrent;
+  // deprecated
   HIP_HIPCTXPUSHCURRENT         hipCtxPushCurrent;
+  // deprecated
   HIP_HIPCTXSETCURRENT          hipCtxSetCurrent;
+  // deprecated
   HIP_HIPCTXSYNCHRONIZE         hipCtxSynchronize;
   HIP_HIPDEVICEGETATTRIBUTE     hipDeviceGetAttribute;
   HIP_HIPDEVICEGETCOUNT         hipDeviceGetCount;
@@ -626,6 +606,7 @@ typedef struct hc_hip_lib
   HIP_HIPDEVICETOTALMEM         hipDeviceTotalMem;
   HIP_HIPDRIVERGETVERSION       hipDriverGetVersion;
   HIP_HIPEVENTCREATE            hipEventCreate;
+  HIP_HIPEVENTCREATEWITHFLAGS   hipEventCreateWithFlags;
   HIP_HIPEVENTDESTROY           hipEventDestroy;
   HIP_HIPEVENTELAPSEDTIME       hipEventElapsedTime;
   HIP_HIPEVENTRECORD            hipEventRecord;
@@ -653,62 +634,75 @@ typedef struct hc_hip_lib
   HIP_HIPMODULELOADDATAEX       hipModuleLoadDataEx;
   HIP_HIPMODULEUNLOAD           hipModuleUnload;
   HIP_HIPRUNTIMEGETVERSION      hipRuntimeGetVersion;
+  HIP_HIPSETDEVICE              hipSetDevice;
+  HIP_HIPSETDEVICEFLAGS         hipSetDeviceFlags;
   HIP_HIPSTREAMCREATE           hipStreamCreate;
+  HIP_HIPSTREAMCREATEWITHFLAGS  hipStreamCreateWithFlags;
   HIP_HIPSTREAMDESTROY          hipStreamDestroy;
   HIP_HIPSTREAMSYNCHRONIZE      hipStreamSynchronize;
   HIP_HIPGETDEVICEPROPERTIES    hipGetDeviceProperties;
-  HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR 	hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
+  HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR  hipModuleOccupancyMaxActiveBlocksPerMultiprocessor;
 
 } hc_hip_lib_t;
 
 typedef hc_hip_lib_t HIP_PTR;
 
-int  hip_init                  (void *hashcat_ctx);
-void hip_close                 (void *hashcat_ctx);
+int  hip_init                   (void *hashcat_ctx);
+void hip_close                  (void *hashcat_ctx);
 
-int hc_hipCtxCreate            (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
-int hc_hipCtxDestroy           (void *hashcat_ctx, hipCtx_t ctx);
-int hc_hipCtxPopCurrent        (void *hashcat_ctx, hipCtx_t *pctx);
-int hc_hipCtxPushCurrent       (void *hashcat_ctx, hipCtx_t ctx);
-int hc_hipCtxSetCurrent        (void *hashcat_ctx, hipCtx_t ctx);
-int hc_hipCtxSynchronize       (void *hashcat_ctx);
-int hc_hipDeviceGet            (void *hashcat_ctx, hipDevice_t *device, int ordinal);
-int hc_hipDeviceGetAttribute   (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
-int hc_hipDeviceGetCount       (void *hashcat_ctx, int *count);
-int hc_hipDeviceGetName        (void *hashcat_ctx, char *name, int len, hipDevice_t dev);
-int hc_hipDeviceTotalMem       (void *hashcat_ctx, size_t *bytes, hipDevice_t dev);
-int hc_hipDriverGetVersion     (void *hashcat_ctx, int *driverVersion);
-int hc_hipEventCreate          (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
-int hc_hipEventDestroy         (void *hashcat_ctx, hipEvent_t hEvent);
-int hc_hipEventElapsedTime     (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
-int hc_hipEventQuery           (void *hashcat_ctx, hipEvent_t hEvent);
-int hc_hipEventRecord          (void *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream);
-int hc_hipEventSynchronize     (void *hashcat_ctx, hipEvent_t hEvent);
-int hc_hipFuncGetAttribute     (void *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc);
-int hc_hipInit                 (void *hashcat_ctx, unsigned int Flags);
-int hc_hipLaunchKernel         (void *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
-int hc_hipMemAlloc             (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize);
-int hc_hipMemFree              (void *hashcat_ctx, hipDeviceptr_t dptr);
-int hc_hipMemGetInfo           (void *hashcat_ctx, size_t *free, size_t *total);
-int hc_hipMemcpyDtoD           (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount);
-int hc_hipMemcpyDtoH           (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
-int hc_hipMemcpyHtoD           (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount);
-int hc_hipMemsetD32            (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N);
-int hc_hipMemsetD8             (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N);
-int hc_hipMemcpyDtoDAsync      (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
-int hc_hipMemcpyDtoHAsync      (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
-int hc_hipMemcpyHtoDAsync      (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
-int hc_hipMemsetD32Async       (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N, hipStream_t hStream);
-int hc_hipMemsetD8Async        (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N, hipStream_t hStream);
-int hc_hipModuleGetFunction    (void *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name);
-int hc_hipModuleGetGlobal      (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
-int hc_hipModuleLoadDataEx     (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
-int hc_hipModuleUnload         (void *hashcat_ctx, hipModule_t hmod);
-int hc_hipRuntimeGetVersion    (void *hashcat_ctx, int *runtimeVersion);
-int hc_hipStreamCreate         (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags);
-int hc_hipStreamDestroy        (void *hashcat_ctx, hipStream_t hStream);
-int hc_hipStreamSynchronize    (void *hashcat_ctx, hipStream_t hStream);
-int hc_hipGetDeviceProperties  (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);
+  // deprecated
+int hc_hipCtxCreate             (void *hashcat_ctx, hipCtx_t *pctx, unsigned int flags, hipDevice_t dev);
+  // deprecated
+int hc_hipCtxDestroy            (void *hashcat_ctx, hipCtx_t ctx);
+  // deprecated
+int hc_hipCtxPopCurrent         (void *hashcat_ctx, hipCtx_t *pctx);
+  // deprecated
+int hc_hipCtxPushCurrent        (void *hashcat_ctx, hipCtx_t ctx);
+  // deprecated
+int hc_hipCtxSetCurrent         (void *hashcat_ctx, hipCtx_t ctx);
+  // deprecated
+int hc_hipCtxSynchronize        (void *hashcat_ctx);
+int hc_hipDeviceGet             (void *hashcat_ctx, hipDevice_t *device, int ordinal);
+int hc_hipDeviceGetAttribute    (void *hashcat_ctx, int *pi, hipDeviceAttribute_t attrib, hipDevice_t dev);
+int hc_hipDeviceGetCount        (void *hashcat_ctx, int *count);
+int hc_hipDeviceGetName         (void *hashcat_ctx, char *name, int len, hipDevice_t dev);
+int hc_hipDeviceTotalMem        (void *hashcat_ctx, size_t *bytes, hipDevice_t dev);
+int hc_hipDriverGetVersion      (void *hashcat_ctx, int *driverVersion);
+int hc_hipEventCreate           (void *hashcat_ctx, hipEvent_t *phEvent);
+int hc_hipEventCreateWithFlags  (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags);
+int hc_hipEventDestroy          (void *hashcat_ctx, hipEvent_t hEvent);
+int hc_hipEventElapsedTime      (void *hashcat_ctx, float *pMilliseconds, hipEvent_t hStart, hipEvent_t hEnd);
+int hc_hipEventQuery            (void *hashcat_ctx, hipEvent_t hEvent);
+int hc_hipEventRecord           (void *hashcat_ctx, hipEvent_t hEvent, hipStream_t hStream);
+int hc_hipEventSynchronize      (void *hashcat_ctx, hipEvent_t hEvent);
+int hc_hipFuncGetAttribute      (void *hashcat_ctx, int *pi, hipFunction_attribute attrib, hipFunction_t hfunc);
+int hc_hipInit                  (void *hashcat_ctx, unsigned int Flags);
+int hc_hipLaunchKernel          (void *hashcat_ctx, hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hipStream_t hStream, void **kernelParams, void **extra);
+int hc_hipMemAlloc              (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t bytesize);
+int hc_hipMemFree               (void *hashcat_ctx, hipDeviceptr_t dptr);
+int hc_hipMemGetInfo            (void *hashcat_ctx, size_t *free, size_t *total);
+int hc_hipMemcpyDtoD            (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount);
+int hc_hipMemcpyDtoH            (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount);
+int hc_hipMemcpyHtoD            (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount);
+int hc_hipMemsetD32             (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N);
+int hc_hipMemsetD8              (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N);
+int hc_hipMemcpyDtoDAsync       (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
+int hc_hipMemcpyDtoHAsync       (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream);
+int hc_hipMemcpyHtoDAsync       (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount, hipStream_t hStream);
+int hc_hipMemsetD32Async        (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N, hipStream_t hStream);
+int hc_hipMemsetD8Async         (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N, hipStream_t hStream);
+int hc_hipModuleGetFunction     (void *hashcat_ctx, hipFunction_t *hfunc, hipModule_t hmod, const char *name);
+int hc_hipModuleGetGlobal       (void *hashcat_ctx, hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
+int hc_hipModuleLoadDataEx      (void *hashcat_ctx, hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
+int hc_hipModuleUnload          (void *hashcat_ctx, hipModule_t hmod);
+int hc_hipRuntimeGetVersion     (void *hashcat_ctx, int *runtimeVersion);
+int hc_hipSetDevice             (void *hashcat_ctx, hipDevice_t dev);
+int hc_hipSetDeviceFlags        (void *hashcat_ctx, unsigned int flags);
+int hc_hipStreamCreate          (void *hashcat_ctx, hipStream_t *phStream);
+int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int flags);
+int hc_hipStreamDestroy         (void *hashcat_ctx, hipStream_t hStream);
+int hc_hipStreamSynchronize     (void *hashcat_ctx, hipStream_t hStream);
+int hc_hipGetDeviceProperties   (void *hashcat_ctx, hipDeviceProp_t *prop, hipDevice_t dev);
 int hc_hipModuleOccupancyMaxActiveBlocksPerMultiprocessor (void *hashcat_ctx, int *numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
 
 #endif // HC_EXT_HIP_H
diff --git a/src/autotune.c b/src/autotune.c
index 7938fb259..e8f704599 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -679,7 +679,7 @@ HC_API_CALL void *thread_autotune (void *p)
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
   }
 
   // check for autotune failure
@@ -695,11 +695,6 @@ HC_API_CALL void *thread_autotune (void *p)
     if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
   }
 
-  if (device_param->is_hip == true)
-  {
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
-  }
-
   return NULL;
 }
 
diff --git a/src/backend.c b/src/backend.c
index 6e8fe4aad..6e84eed34 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -993,7 +993,7 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
 
     if (hc_hipMemcpyDtoH (hashcat_ctx, &pw_idx, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), sizeof (pw_idx_t)) == -1) return -1;
 
@@ -1059,11 +1059,6 @@ int gidd_to_pw_t (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, c
     if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return -1;
   }
 
-  if (device_param->is_hip == true)
-  {
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
-  }
-
   return 0;
 }
 
@@ -1082,13 +1077,11 @@ int copy_pws_idx (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, u
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
 
     if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_idx + (gidd * sizeof (pw_idx_t)), (cnt * sizeof (pw_idx_t))) == -1) return -1;
 
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
-
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
   }
 
   #if defined (__APPLE__)
@@ -1122,13 +1115,11 @@ int copy_pws_comp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return -1;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return -1;
 
     if (hc_hipMemcpyDtoH (hashcat_ctx, dest, device_param->hip_d_pws_comp_buf + (off * sizeof (u32)), cnt * sizeof (u32)) == -1) return -1;
 
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
-
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return -1;
   }
 
   #if defined (__APPLE__)
@@ -5937,24 +5928,24 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
       device_param->has_prmt  = (sm >= 20) ? true : false;
       device_param->has_shfw  = (sm >= 70) ? true : false;
 
+      // one-time init cuda context
+
+      if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
+      {
+        device_param->skipped = true;
+
+        continue;
+      }
+
+      if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
+      {
+        device_param->skipped = true;
+
+        continue;
+      }
+
       // device_available_mem
 
-      CUcontext cuda_context;
-
-      if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      if (hc_cuCtxPushCurrent (hashcat_ctx, cuda_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
       size_t free  = 0;
       size_t total = 0;
 
@@ -5967,14 +5958,7 @@ static void backend_ctx_devices_init_cuda (hashcat_ctx_t *hashcat_ctx, int *virt
 
       device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
 
-      if (hc_cuCtxPopCurrent (hashcat_ctx, &cuda_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1)
+      if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1)
       {
         device_param->skipped = true;
 
@@ -6440,24 +6424,24 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
       device_param->has_prmt  = false;
       device_param->has_shfw  = true; // always reports false : prop.arch.hasFunnelShift;
 
+      // one-time init hip context
+
+      if (hc_hipSetDeviceFlags (hashcat_ctx, hipDeviceScheduleBlockingSync) == -1)
+      {
+        device_param->skipped = true;
+
+        continue;
+      }
+
+      if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1)
+      {
+        device_param->skipped = true;
+
+        continue;
+      }
+
       // device_available_mem
 
-      hipCtx_t hip_context;
-
-      if (hc_hipCtxCreate (hashcat_ctx, &hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      if (hc_hipCtxPushCurrent (hashcat_ctx, hip_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
       size_t free  = 0;
       size_t total = 0;
 
@@ -6470,20 +6454,6 @@ static void backend_ctx_devices_init_hip (hashcat_ctx_t *hashcat_ctx, int *virth
 
       device_param->device_available_mem = ((u64) free * (100 - user_options->backend_devices_keepfree)) / 100;
 
-      if (hc_hipCtxPopCurrent (hashcat_ctx, &hip_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      if (hc_hipCtxDestroy (hashcat_ctx, hip_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
       #if defined (__linux__)
       if (strchr (folder_config->cpath_real, ' ') != NULL)
       {
@@ -8623,13 +8593,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         if (device_param->skipped == true) continue;
       }
 
-      /**
-       * create command-queue
-       */
+      // one-time init metal command-queue
 
-      mtl_command_queue command_queue;
-
-      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &command_queue) == -1)
+      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
       {
         device_param->skipped = true;
 
@@ -8678,11 +8644,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
           u8 tmp_host[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
 
-          if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
-          if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, tmp_device[c], 0, tmp_host, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmp_host, tmp_device[c], 0, sizeof (tmp_host)) == -1) break;
 
-          if (hc_mtlMemcpyHtoD (hashcat_ctx, command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
-          if (hc_mtlMemcpyDtoH (hashcat_ctx, command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyHtoD (hashcat_ctx, device_param->metal_command_queue, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), tmp_host, sizeof (tmp_host)) == -1) break;
+          if (hc_mtlMemcpyDtoH (hashcat_ctx, device_param->metal_command_queue, tmp_host, tmp_device[c], MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host)) == -1) break;
         }
 
         device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
@@ -8707,8 +8673,6 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         hcfree (tmp_device);
       }
 
-      hc_mtlReleaseCommandQueue (hashcat_ctx, command_queue);
-
       if (device_param->device_host_unified_memory == 1)
       {
         // so, we actually have only half the memory because we need the same buffers on host side
@@ -8734,11 +8698,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         if (device_param->skipped == true) continue;
       }
 
-      /**
-       * create context for each device
-       */
-
-      cl_context context;
+      // one-time init opencl context
 
       /*
       cl_context_properties properties[3];
@@ -8747,10 +8707,10 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       properties[1] = (cl_context_properties) device_param->opencl_platform;
       properties[2] = 0;
 
-      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
+      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
       */
 
-      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1)
+      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
       {
         device_param->skipped = true;
 
@@ -8760,13 +8720,9 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         continue;
       }
 
-      /**
-       * create command-queue
-       */
+      // one-time init open command-queue
 
-      cl_command_queue command_queue;
-
-      if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1)
+      if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
       {
         device_param->skipped = true;
 
@@ -8781,17 +8737,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
       {
         #define RUN_INSTRUCTION_CHECKS() \
-          device_param->has_vadd     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vaddc    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vadd_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vsub     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vsubb    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vsub_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-          device_param->has_vadd3    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vbfe     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32     %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
-          device_param->has_vperm    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vadd     = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vaddc    = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vadd_co  = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vsub     = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vsubb    = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vsub_co  = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vadd3    = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vbfe     = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32     %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vperm    = opencl_test_instruction (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
 
         if (backend_devices_idx > 0)
         {
@@ -8979,7 +8935,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
               OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
 
-              tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
+              tmp_device[c] = ocl->clCreateBuffer (device_param->opencl_context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
 
               if (CL_err != CL_SUCCESS)
               {
@@ -8992,11 +8948,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
               u8 tmp_host[8];
 
-              if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
-              if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+              if (ocl->clEnqueueReadBuffer  (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+              if (ocl->clEnqueueWriteBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
 
-              if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
-              if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+              if (ocl->clEnqueueReadBuffer  (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+              if (ocl->clEnqueueWriteBuffer (device_param->opencl_command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
             }
 
             device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
@@ -9008,24 +8964,28 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
             // clean up
 
+            int r = 0;
+
             for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
             {
               if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
 
               if (tmp_device[c] != NULL)
               {
-                if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
+                if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) r = -1;
               }
             }
 
             hcfree (tmp_device);
+
+            if (r == -1)
+            {
+              // return -1 here is blocking, to be better evaluated
+              //return -1;
+            }
           }
         }
 
-        hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
-
-        hc_clReleaseContext (hashcat_ctx, context);
-
         if (device_param->device_host_unified_memory == 1)
         {
           // so, we actually have only half the memory because we need the same buffers on host side
@@ -9065,23 +9025,61 @@ void backend_ctx_devices_destroy (hashcat_ctx_t *hashcat_ctx)
     hcfree (backend_ctx->opencl_platforms_version[opencl_platforms_idx]);
   }
 
+  // one-time release context/command-queue from all runtimes
+
   for (int backend_devices_idx = 0; backend_devices_idx < backend_ctx->backend_devices_cnt; backend_devices_idx++)
   {
     hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
 
     hcfree (device_param->device_name);
 
+    if (device_param->is_cuda == true)
+    {
+      if (device_param->cuda_context)
+      {
+        hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
+
+        device_param->cuda_context = NULL;
+      }
+    }
+
+    if (device_param->is_hip == true)
+    {
+      hcfree (device_param->gcnArchName);
+    }
+
+    #if defined (__APPLE__)
+    if (device_param->is_metal == true)
+    {
+      if (device_param->metal_command_queue)
+      {
+        hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
+
+        device_param->metal_command_queue = NULL;
+      }
+    }
+    #endif
+
     if (device_param->is_opencl == true)
     {
       hcfree (device_param->opencl_driver_version);
       hcfree (device_param->opencl_device_version);
       hcfree (device_param->opencl_device_c_version);
       hcfree (device_param->opencl_device_vendor);
-    }
 
-    if (device_param->is_hip == true)
-    {
-      hcfree (device_param->gcnArchName);
+      if (device_param->opencl_command_queue)
+      {
+        hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
+
+        device_param->opencl_command_queue = NULL;
+      }
+
+      if (device_param->opencl_context)
+      {
+        hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
+
+        device_param->opencl_context = NULL;
+      }
     }
   }
 
@@ -10730,93 +10728,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
     }
     #endif
 
-    /**
-     * create context for each device
-     */
-
-    if (device_param->is_cuda == true)
-    {
-      if (hc_cuCtxCreate (hashcat_ctx, &device_param->cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-    }
-
-    if (device_param->is_hip == true)
-    {
-      if (hc_hipCtxCreate (hashcat_ctx, &device_param->hip_context, hipDeviceScheduleBlockingSync, device_param->hip_device) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-    }
-
-    #if defined (__APPLE__)
-    if (device_param->is_metal == true)
-    {
-      /**
-       * create command-queue
-       */
-
-      if (hc_mtlCreateCommandQueue (hashcat_ctx, device_param->metal_device, &device_param->metal_command_queue) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-    }
-    #endif
-
-    if (device_param->is_opencl == true)
-    {
-      /*
-      cl_context_properties properties[3];
-
-      properties[0] = CL_CONTEXT_PLATFORM;
-      properties[1] = (cl_context_properties) device_param->opencl_platform;
-      properties[2] = 0;
-
-      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context);
-      */
-
-      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &device_param->opencl_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-
-      /**
-       * create command-queue
-       */
-
-      // not supported with NV
-      // device_param->opencl_command_queue = hc_clCreateCommandQueueWithProperties (hashcat_ctx, device_param->opencl_device, NULL);
-
-      if (hc_clCreateCommandQueue (hashcat_ctx, device_param->opencl_context, device_param->opencl_device, CL_QUEUE_PROFILING_ENABLE, &device_param->opencl_command_queue) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-    }
+    // re-using context/command-queue, there is no need to re-initialize them
 
     /**
      * create stream for CUDA devices
@@ -10824,6 +10736,13 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_cuda == true)
     {
+      if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1)
+      {
+        device_param->skipped = true;
+
+        continue;
+      }
+
       if (hc_cuStreamCreate (hashcat_ctx, &device_param->cuda_stream, CU_STREAM_DEFAULT) == -1)
       {
         device_param->skipped = true;
@@ -10838,7 +10757,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipStreamCreate (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
+      if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1)
+      {
+        device_param->skipped = true;
+
+        continue;
+      }
+
+      if (hc_hipStreamCreateWithFlags (hashcat_ctx, &device_param->hip_stream, hipStreamDefault) == -1)
       {
         device_param->skipped = true;
 
@@ -10880,21 +10806,21 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
 
     if (device_param->is_hip == true)
     {
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
+      if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event1, hipEventBlockingSync) == -1)
       {
         device_param->skipped = true;
 
         continue;
       }
 
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
+      if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event2, hipEventBlockingSync) == -1)
       {
         device_param->skipped = true;
 
         continue;
       }
 
-      if (hc_hipEventCreate (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
+      if (hc_hipEventCreateWithFlags (hashcat_ctx, &device_param->hip_event3, hipEventDisableTiming) == -1)
       {
         device_param->skipped = true;
 
@@ -16927,16 +16853,6 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
-    if (device_param->is_hip == true)
-    {
-      if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1)
-      {
-        device_param->skipped = true;
-
-        continue;
-      }
-    }
-
     hardware_power_all += hardware_power_max;
 
     EVENT_DATA (EVENT_BACKEND_DEVICE_INIT_POST, &backend_devices_idx, sizeof (int));
@@ -17058,7 +16974,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->cuda_module_amp)       hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_amp);
       if (device_param->cuda_module_shared)    hc_cuModuleUnload (hashcat_ctx, device_param->cuda_module_shared);
 
-      if (device_param->cuda_context)          hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
+      //if (device_param->cuda_context)          hc_cuCtxDestroy (hashcat_ctx, device_param->cuda_context);
 
       device_param->cuda_d_pws_buf            = 0;
       device_param->cuda_d_pws_amp_buf        = 0;
@@ -17135,7 +17051,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->cuda_module_amp           = NULL;
       device_param->cuda_module_shared        = NULL;
 
-      device_param->cuda_context              = NULL;
+      //device_param->cuda_context              = NULL;
     }
 
     if (device_param->is_hip == true)
@@ -17189,8 +17105,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->hip_module_amp)       hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_amp);
       if (device_param->hip_module_shared)    hc_hipModuleUnload (hashcat_ctx, device_param->hip_module_shared);
 
-      if (device_param->hip_context)          hc_hipCtxDestroy (hashcat_ctx, device_param->hip_context);
-
       device_param->hip_d_pws_buf            = 0;
       device_param->hip_d_pws_amp_buf        = 0;
       device_param->hip_d_pws_comp_buf       = 0;
@@ -17265,8 +17179,6 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->hip_module_mp            = NULL;
       device_param->hip_module_amp           = NULL;
       device_param->hip_module_shared        = NULL;
-
-      device_param->hip_context              = NULL;
     }
 
     #if defined (__APPLE__)
@@ -17341,7 +17253,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->metal_library_amp)            hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_amp);
       if (device_param->metal_library_shared)         hc_mtlReleaseLibrary (hashcat_ctx, device_param->metal_library_shared);
 
-      if (device_param->metal_command_queue)          hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
+      //if (device_param->metal_command_queue)          hc_mtlReleaseCommandQueue (hashcat_ctx, device_param->metal_command_queue);
 
       //if (device_param->metal_device)                 hc_mtlReleaseDevice (hashcat_ctx, device_param->metal_device);
 
@@ -17411,7 +17323,7 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->metal_library_mp             = NULL;
       device_param->metal_library_amp            = NULL;
       device_param->metal_library_shared         = NULL;
-      device_param->metal_command_queue          = NULL;
+      //device_param->metal_command_queue          = NULL;
       //device_param->metal_device                 = NULL;
     }
     #endif // __APPLE__
@@ -17487,9 +17399,9 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       if (device_param->opencl_program_amp)      hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_amp);
       if (device_param->opencl_program_shared)   hc_clReleaseProgram (hashcat_ctx, device_param->opencl_program_shared);
 
-      if (device_param->opencl_command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
+      //if (device_param->opencl_command_queue)    hc_clReleaseCommandQueue (hashcat_ctx, device_param->opencl_command_queue);
 
-      if (device_param->opencl_context)          hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
+      //if (device_param->opencl_context)          hc_clReleaseContext (hashcat_ctx, device_param->opencl_context);
 
       device_param->opencl_d_pws_buf           = NULL;
       device_param->opencl_d_pws_amp_buf       = NULL;
@@ -17557,8 +17469,8 @@ void backend_session_destroy (hashcat_ctx_t *hashcat_ctx)
       device_param->opencl_program_mp          = NULL;
       device_param->opencl_program_amp         = NULL;
       device_param->opencl_program_shared      = NULL;
-      device_param->opencl_command_queue       = NULL;
-      device_param->opencl_context             = NULL;
+      //device_param->opencl_command_queue       = NULL;
+      //device_param->opencl_context             = NULL;
     }
 
     device_param->h_tmps              = NULL;
diff --git a/src/dispatch.c b/src/dispatch.c
index ad7c9851b..9ac171f31 100644
--- a/src/dispatch.c
+++ b/src/dispatch.c
@@ -381,7 +381,7 @@ HC_API_CALL void *thread_calc_stdin (void *p)
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
   }
 
   if (calc_stdin (hashcat_ctx, device_param) == -1)
@@ -396,11 +396,6 @@ HC_API_CALL void *thread_calc_stdin (void *p)
     if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
   }
 
-  if (device_param->is_hip == true)
-  {
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
-  }
-
   if (bridge_ctx->enabled == true)
   {
     if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
@@ -1685,7 +1680,7 @@ HC_API_CALL void *thread_calc (void *p)
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
   }
 
   if (calc (hashcat_ctx, device_param) == -1)
@@ -1700,11 +1695,6 @@ HC_API_CALL void *thread_calc (void *p)
     if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
   }
 
-  if (device_param->is_hip == true)
-  {
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
-  }
-
   if (bridge_ctx->enabled == true)
   {
     if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
diff --git a/src/ext_hip.c b/src/ext_hip.c
index 4a26a7e9f..9e7eb00ab 100644
--- a/src/ext_hip.c
+++ b/src/ext_hip.c
@@ -115,51 +115,55 @@ int hip_init (void *hashcat_ctx)
   // a good reference is cuda.h itself
   // this needs to be verified for each new cuda release
 
-  HC_LOAD_FUNC_HIP (hip, hipCtxCreate,              hipCtxCreate,               HIP_HIPCTXCREATE,               HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipCtxDestroy,             hipCtxDestroy,              HIP_HIPCTXDESTROY,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent,          hipCtxPopCurrent,           HIP_HIPCTXPOPCURRENT,           HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent,         hipCtxPushCurrent,          HIP_HIPCTXPUSHCURRENT,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent,          hipCtxSetCurrent,           HIP_HIPCTXSETCURRENT,           HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize,         hipCtxSynchronize,          HIP_HIPCTXSYNCHRONIZE,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDeviceGet,              hipDeviceGet,               HIP_HIPDEVICEGET,               HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute,     hipDeviceGetAttribute,      HIP_HIPDEVICEGETATTRIBUTE,      HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount,         hipGetDeviceCount,          HIP_HIPDEVICEGETCOUNT,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDeviceGetName,          hipDeviceGetName,           HIP_HIPDEVICEGETNAME,           HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem,         hipDeviceTotalMem,          HIP_HIPDEVICETOTALMEM,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion,       hipDriverGetVersion,        HIP_HIPDRIVERGETVERSION,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipEventCreate,            hipEventCreateWithFlags,    HIP_HIPEVENTCREATE,             HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipEventDestroy,           hipEventDestroy,            HIP_HIPEVENTDESTROY,            HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime,       hipEventElapsedTime,        HIP_HIPEVENTELAPSEDTIME,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipEventRecord,            hipEventRecord,             HIP_HIPEVENTRECORD,             HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipEventSynchronize,       hipEventSynchronize,        HIP_HIPEVENTSYNCHRONIZE,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute,       hipFuncGetAttribute,        HIP_HIPFUNCGETATTRIBUTE,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipDrvGetErrorName,         HIP_HIPGETERRORNAME,            HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipGetErrorString,         hipDrvGetErrorString,       HIP_HIPGETERRORSTRING,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipInit,                   hipInit,                    HIP_HIPINIT,                    HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipLaunchKernel,           hipModuleLaunchKernel,      HIP_HIPLAUNCHKERNEL,            HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemAlloc,               hipMalloc,                  HIP_HIPMEMALLOC,                HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemFree,                hipFree,                    HIP_HIPMEMFREE,                 HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemGetInfo,             hipMemGetInfo,              HIP_HIPMEMGETINFO,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD,             hipMemcpyDtoD,              HIP_HIPMEMCPYDTOD,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH,             hipMemcpyDtoH,              HIP_HIPMEMCPYDTOH,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD,             hipMemcpyHtoD,              HIP_HIPMEMCPYHTOD,              HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemsetD32,              hipMemsetD32,               HIP_HIPMEMSETD32,               HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemsetD8,               hipMemsetD8,                HIP_HIPMEMSETD8,                HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync,        hipMemcpyDtoDAsync,         HIP_HIPMEMCPYDTODASYNC,         HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync,        hipMemcpyDtoHAsync,         HIP_HIPMEMCPYDTOHASYNC,         HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync,        hipMemcpyHtoDAsync,         HIP_HIPMEMCPYHTODASYNC,         HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemsetD32Async,         hipMemsetD32Async,          HIP_HIPMEMSETD32ASYNC,          HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipMemsetD8Async,          hipMemsetD8Async,           HIP_HIPMEMSETD8ASYNC,           HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction,      hipModuleGetFunction,       HIP_HIPMODULEGETFUNCTION,       HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal,        hipModuleGetGlobal,         HIP_HIPMODULEGETGLOBAL,         HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx,       hipModuleLoadDataEx,        HIP_HIPMODULELOADDATAEX,        HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleUnload,           hipModuleUnload,            HIP_HIPMODULEUNLOAD,            HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion,      hipRuntimeGetVersion,       HIP_HIPRUNTIMEGETVERSION,       HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipStreamCreate,           hipStreamCreate,            HIP_HIPSTREAMCREATE,            HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipStreamDestroy,          hipStreamDestroy,           HIP_HIPSTREAMDESTROY,           HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize,      hipStreamSynchronize,       HIP_HIPSTREAMSYNCHRONIZE,       HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties,    hipGetDevicePropertiesR0600,     HIP_HIPGETDEVICEPROPERTIES,     HIP, 1);
-  HC_LOAD_FUNC_HIP (hip, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor,    hipModuleOccupancyMaxActiveBlocksPerMultiprocessor,     HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR,     HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxCreate,              hipCtxCreate,                 HIP_HIPCTXCREATE,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxDestroy,             hipCtxDestroy,                HIP_HIPCTXDESTROY,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent,          hipCtxPopCurrent,             HIP_HIPCTXPOPCURRENT,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent,         hipCtxPushCurrent,            HIP_HIPCTXPUSHCURRENT,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent,          hipCtxSetCurrent,             HIP_HIPCTXSETCURRENT,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize,         hipCtxSynchronize,            HIP_HIPCTXSYNCHRONIZE,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGet,              hipDeviceGet,                 HIP_HIPDEVICEGET,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute,     hipDeviceGetAttribute,        HIP_HIPDEVICEGETATTRIBUTE,      HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount,         hipGetDeviceCount,            HIP_HIPDEVICEGETCOUNT,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceGetName,          hipDeviceGetName,             HIP_HIPDEVICEGETNAME,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem,         hipDeviceTotalMem,            HIP_HIPDEVICETOTALMEM,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion,       hipDriverGetVersion,          HIP_HIPDRIVERGETVERSION,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventCreate,            hipEventCreate,               HIP_HIPEVENTCREATE,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventCreateWithFlags,   hipEventCreateWithFlags,      HIP_HIPEVENTCREATEWITHFLAGS,    HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventDestroy,           hipEventDestroy,              HIP_HIPEVENTDESTROY,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime,       hipEventElapsedTime,          HIP_HIPEVENTELAPSEDTIME,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventRecord,            hipEventRecord,               HIP_HIPEVENTRECORD,             HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipEventSynchronize,       hipEventSynchronize,          HIP_HIPEVENTSYNCHRONIZE,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute,       hipFuncGetAttribute,          HIP_HIPFUNCGETATTRIBUTE,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorName,           hipDrvGetErrorName,           HIP_HIPGETERRORNAME,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetErrorString,         hipDrvGetErrorString,         HIP_HIPGETERRORSTRING,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipInit,                   hipInit,                      HIP_HIPINIT,                    HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipLaunchKernel,           hipModuleLaunchKernel,        HIP_HIPLAUNCHKERNEL,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemAlloc,               hipMalloc,                    HIP_HIPMEMALLOC,                HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemFree,                hipFree,                      HIP_HIPMEMFREE,                 HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemGetInfo,             hipMemGetInfo,                HIP_HIPMEMGETINFO,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD,             hipMemcpyDtoD,                HIP_HIPMEMCPYDTOD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH,             hipMemcpyDtoH,                HIP_HIPMEMCPYDTOH,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD,             hipMemcpyHtoD,                HIP_HIPMEMCPYHTOD,              HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD32,              hipMemsetD32,                 HIP_HIPMEMSETD32,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD8,               hipMemsetD8,                  HIP_HIPMEMSETD8,                HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync,        hipMemcpyDtoDAsync,           HIP_HIPMEMCPYDTODASYNC,         HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync,        hipMemcpyDtoHAsync,           HIP_HIPMEMCPYDTOHASYNC,         HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync,        hipMemcpyHtoDAsync,           HIP_HIPMEMCPYHTODASYNC,         HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD32Async,         hipMemsetD32Async,            HIP_HIPMEMSETD32ASYNC,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipMemsetD8Async,          hipMemsetD8Async,             HIP_HIPMEMSETD8ASYNC,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction,      hipModuleGetFunction,         HIP_HIPMODULEGETFUNCTION,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal,        hipModuleGetGlobal,           HIP_HIPMODULEGETGLOBAL,         HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx,       hipModuleLoadDataEx,          HIP_HIPMODULELOADDATAEX,        HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleUnload,           hipModuleUnload,              HIP_HIPMODULEUNLOAD,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion,      hipRuntimeGetVersion,         HIP_HIPRUNTIMEGETVERSION,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipSetDevice,              hipSetDevice,                 HIP_HIPSETDEVICE,               HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipSetDeviceFlags,         hipSetDeviceFlags,            HIP_HIPSETDEVICEFLAGS,          HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamCreate,           hipStreamCreate,              HIP_HIPSTREAMCREATE,            HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamCreateWithFlags,  hipStreamCreateWithFlags,     HIP_HIPSTREAMCREATEWITHFLAGS,   HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamDestroy,          hipStreamDestroy,             HIP_HIPSTREAMDESTROY,           HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize,      hipStreamSynchronize,         HIP_HIPSTREAMSYNCHRONIZE,       HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties,    hipGetDevicePropertiesR0600,  HIP_HIPGETDEVICEPROPERTIES,     HIP, 1);
+  HC_LOAD_FUNC_HIP (hip, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR, HIP, 1);
 
   return 0;
 }
@@ -507,13 +511,13 @@ int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion)
   return 0;
 }
 
-int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags)
+int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent)
 {
   backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const hipError_t HIP_err = hip->hipEventCreate (phEvent, Flags);
+  const hipError_t HIP_err = hip->hipEventCreate (phEvent);
 
   if (HIP_err != hipSuccess)
   {
@@ -534,6 +538,33 @@ int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flag
   return 0;
 }
 
+int hc_hipEventCreateWithFlags (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int flags)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipEventCreateWithFlags (phEvent, flags);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent)
 {
   backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
@@ -1211,13 +1242,67 @@ int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion)
   return 0;
 }
 
-int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
+int hc_hipSetDevice (void *hashcat_ctx, hipDevice_t dev)
 {
   backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
 
   HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
 
-  const hipError_t HIP_err = hip->hipStreamCreate (phStream, Flags);
+  const hipError_t HIP_err = hip->hipSetDevice (dev);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipSetDevice(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipSetDevice(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipSetDeviceFlags (void *hashcat_ctx, unsigned int flags)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipSetDeviceFlags (flags);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
+int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipStreamCreate (phStream);
 
   if (HIP_err != hipSuccess)
   {
@@ -1238,6 +1323,33 @@ int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int F
   return 0;
 }
 
+int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
+{
+  backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
+
+  HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
+
+  const hipError_t HIP_err = hip->hipStreamCreateWithFlags (phStream, Flags);
+
+  if (HIP_err != hipSuccess)
+  {
+    const char *pStr = NULL;
+
+    if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
+    {
+      event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %s", pStr);
+    }
+    else
+    {
+      event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %d", HIP_err);
+    }
+
+    return -1;
+  }
+
+  return 0;
+}
+
 int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream)
 {
   backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
diff --git a/src/selftest.c b/src/selftest.c
index 5e744f88b..6c63a3418 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -1272,7 +1272,7 @@ HC_API_CALL void *thread_selftest (void *p)
 
   if (device_param->is_hip == true)
   {
-    if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
+    if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
   }
 
   const int rc_selftest = process_selftest (hashcat_ctx, device_param);
@@ -1303,8 +1303,6 @@ HC_API_CALL void *thread_selftest (void *p)
   if (device_param->is_hip == true)
   {
     if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return NULL;
-
-    if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
   }
 
   if (bridge_ctx->enabled == true)

From 17e29f298a96c35e151f17876ee9db9c7e52f6f0 Mon Sep 17 00:00:00 2001
From: Royce Williams <royce@techsolvency.com>
Date: Mon, 7 Jul 2025 10:44:59 -0800
Subject: [PATCH 74/83] clarify Kernel.Feature: password, min-max bytes

---
 src/terminal.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 258d02f2e..470770609 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -2936,13 +2936,15 @@ void status_display (hashcat_ctx_t *hashcat_ctx)
   if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
   {
     event_log_info (hashcat_ctx,
-      "Kernel.Feature...: Optimized Kernel (max length: %u)",
+      "Kernel.Feature...: Optimized Kernel (password length %u-%u bytes)",
+      hashconfig->pw_min,
       hashconfig->pw_max);
   }
   else
   {
     event_log_info (hashcat_ctx,
-      "Kernel.Feature...: Pure Kernel (max length: %u)",
+      "Kernel.Feature...: Pure Kernel (password length %u-%u bytes)",
+      hashconfig->pw_min,
       hashconfig->pw_max);
   }
 

From ca1ebc23a4189e80cb33b6c89a4611e19c4ffd2a Mon Sep 17 00:00:00 2001
From: Royce Williams <royce@techsolvency.com>
Date: Mon, 7 Jul 2025 15:41:23 -0800
Subject: [PATCH 75/83] changes.txt for min/max password length display

---
 docs/changes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changes.txt b/docs/changes.txt
index 4161e3b9a..127cf6778 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -72,6 +72,7 @@
 - Added options --benchmark-min and --benchmark-max to set a hash-mode range to be used during the benchmark
 - Added option --total-candidates to provide the total candidate count for an attack insteda of the internal "--keyspace" value
 - Added option --backend-devices-keepfree to configure X percentage of device memory available to keep free
+- Added display of password length mininum and maximum in the Kernel.Feature status line
 
 ##
 ## Performance

From 09cc387bef6f74f5511d5ac29eb85c82e8ed4d99 Mon Sep 17 00:00:00 2001
From: oblivionsage <cookieandcream560@gmail.com>
Date: Tue, 8 Jul 2025 11:11:45 +0200
Subject: [PATCH 76/83] Fix terminal TODO: Implement Windows system info
 display

- Replace hardcoded 'N/A' values with actual Windows system information
- Add GetSystemInfo() for processor architecture detection
- Add GetVersionEx() for Windows version information
- Support both machine-readable and human-readable output formats
- Follow existing Linux uname() implementation pattern
- Maintain cross-platform compatibility

Resolves TODO comment in src/terminal.c line 1257
---
 src/terminal.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 5 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 342a0417f..dc1649eb5 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -1254,21 +1254,60 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     }
 
     #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
-    // TODO
+    // Get Windows system information
+    SYSTEM_INFO sysinfo;
+    OSVERSIONINFO osvi;
+    char platform_buf[256] = "N/A";
+    char release_buf[256] = "N/A";
+    
+    GetSystemInfo (&sysinfo);
+    
+    // Initialize version info structure
+    ZeroMemory (&osvi, sizeof (OSVERSIONINFO));
+    osvi.dwOSVersionInfoSize = sizeof (OSVERSIONINFO);
+    
+    bool rc_version = (GetVersionEx (&osvi) != 0);
+    
+    // Get processor architecture string
+    switch (sysinfo.wProcessorArchitecture)
+    {
+      case PROCESSOR_ARCHITECTURE_AMD64:
+        snprintf (platform_buf, sizeof (platform_buf), "x86_64");
+        break;
+      case PROCESSOR_ARCHITECTURE_INTEL:
+        snprintf (platform_buf, sizeof (platform_buf), "x86");
+        break;
+      case PROCESSOR_ARCHITECTURE_ARM64:
+        snprintf (platform_buf, sizeof (platform_buf), "ARM64");
+        break;
+      case PROCESSOR_ARCHITECTURE_ARM:
+        snprintf (platform_buf, sizeof (platform_buf), "ARM");
+        break;
+      default:
+        snprintf (platform_buf, sizeof (platform_buf), "Unknown");
+    }
+    
+    // Get Windows version string
+    if (rc_version)
+    {
+      snprintf (release_buf, sizeof (release_buf), "%lu.%lu.%lu",
+               osvi.dwMajorVersion, osvi.dwMinorVersion, osvi.dwBuildNumber);
+    }
+    
     if (user_options->machine_readable == false)
     {
       event_log_info (hashcat_ctx, "OS.Name......: Windows");
-      event_log_info (hashcat_ctx, "OS.Release...: N/A");
-      event_log_info (hashcat_ctx, "HW.Platform..: N/A");
+      event_log_info (hashcat_ctx, "OS.Release...: %s", rc_version ? release_buf : "N/A");
+      event_log_info (hashcat_ctx, "HW.Platform..: %s", platform_buf);
       event_log_info (hashcat_ctx, "HW.Model.....: N/A");
     }
     else
     {
       printf ("\"OS\": { ");
       printf ("\"Name\": \"%s\", ", "Windows");
-      printf ("\"Release\": \"%s\" }, ", "N/A");
+      printf ("\"Release\": \"%s\" }, ", rc_version ? release_buf : "N/A");
       printf ("\"Hardware\": { ");
-      printf ("\"Platform\": \"%s\", ", "N/A");
+      printf ("\"Platform\": \"%s\", ", platform_buf);
       printf ("\"Model\": \"%s\" } ", "N/A");
       printf ("}, ");
     }

From b98d5d5f8a81230c712d6602ccbd84ba9f9bdad5 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 8 Jul 2025 13:21:10 +0200
Subject: [PATCH 77/83] Fixed out-of-boundary read for -a 9 when using the new
 OPTS_TYPE_THREAD_MULTI_DISABLE parameter. This only affected Argon2. Fixed
 compiler warnings in inc_hash_argon2.cl. Moved argon2_tmp_t and
 argon2_extra_t typedefs from argon2_common.c back to the module to allow
 plugin developers to modify them when using Argon2 as a primitive. Slightly
 improved autotune behavior for edge cases such as 8700 and 18600, where some
 algorithms started with theoretical excessively high value, leaving no room
 for proper tuning. Removed argon2_module_kernel_threads_min() and
 argon2_module_kernel_threads_max() from argon2_common.c. Switched to using
 OPTS_TYPE_NATIVE_THREADS instead. Plugin developers can still use it. This
 simplifies CPU integration, as CPUs typically run with a single thread.
 Updated plugins 15500 and 20510. Added a thread limit to prevent autotune
 from selecting an excessively high thread count. The issue originated from
 the runtime returning an unrealistically high ideal thread count.

---
 OpenCL/inc_hash_argon2.cl   |  6 +++---
 OpenCL/inc_types.h          |  6 +++++-
 OpenCL/m34000-pure.cl       |  2 +-
 src/autotune.c              | 34 ++++++++++++++++++++++++++++++++++
 src/modules/argon2_common.c | 37 -------------------------------------
 src/modules/module_15500.c  |  9 ++++++++-
 src/modules/module_20510.c  |  9 ++++++++-
 src/modules/module_34000.c  | 28 ++++++++++++++++++++++++++--
 8 files changed, 85 insertions(+), 46 deletions(-)

diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl
index c87179d2c..f9aba1cef 100644
--- a/OpenCL/inc_hash_argon2.cl
+++ b/OpenCL/inc_hash_argon2.cl
@@ -43,7 +43,7 @@ DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, co
     for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx];
 
     blake2b_init (&ctx);
-    blake2b_transform (ctx.h, blake_buf, 64, BLAKE2B_FINAL);
+    blake2b_transform (ctx.h, blake_buf, 64, (u64) BLAKE2B_FINAL);
 
     out[off + 0] = ctx.h[0];
     out[off + 1] = ctx.h[1];
@@ -279,7 +279,7 @@ DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
       return array[3];
   }
 
-  return -1;
+  return (u32) -1;
 }
 
 DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 argon2_thread)
@@ -386,7 +386,7 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const a
   blake2b_init (&ctx);
 
   // Override default (0x40) value in BLAKE2b
-  ctx.h[0] ^= 0x40 ^ options->digest_len; 
+  ctx.h[0] ^= 0x40 ^ options->digest_len;
 
   blake2b_update (&ctx, output_len, 4);
   blake2b_update (&ctx, (PRIVATE_AS u32 *) final_block.values, sizeof(final_block));
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index a13c89b8e..e5a0516e1 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -11,11 +11,13 @@
 #define BITMAP_SHIFT1       kernel_param->bitmap_shift1
 #define BITMAP_SHIFT2       kernel_param->bitmap_shift2
 #define SALT_POS_HOST       (kernel_param->pws_pos + gid)
+#define SALT_POS_HOST_BID   (kernel_param->pws_pos + bid)
 #define LOOP_POS            kernel_param->loop_pos
 #define LOOP_CNT            kernel_param->loop_cnt
 #define IL_CNT              kernel_param->il_cnt
 #define DIGESTS_CNT         1
-#define DIGESTS_OFFSET_HOST (kernel_param->pws_pos + gid)
+#define DIGESTS_OFFSET_HOST     (kernel_param->pws_pos + gid)
+#define DIGESTS_OFFSET_HOST_BID (kernel_param->pws_pos + bid)
 #define COMBS_MODE          kernel_param->combs_mode
 #define SALT_REPEAT         kernel_param->salt_repeat
 #define PWS_POS             kernel_param->pws_pos
@@ -25,11 +27,13 @@
 #define BITMAP_SHIFT1       kernel_param->bitmap_shift1
 #define BITMAP_SHIFT2       kernel_param->bitmap_shift2
 #define SALT_POS_HOST       kernel_param->salt_pos_host
+#define SALT_POS_HOST_BID   SALT_POS_HOST
 #define LOOP_POS            kernel_param->loop_pos
 #define LOOP_CNT            kernel_param->loop_cnt
 #define IL_CNT              kernel_param->il_cnt
 #define DIGESTS_CNT         kernel_param->digests_cnt
 #define DIGESTS_OFFSET_HOST kernel_param->digests_offset_host
+#define DIGESTS_OFFSET_HOST_BID DIGESTS_OFFSET_HOST
 #define COMBS_MODE          kernel_param->combs_mode
 #define SALT_REPEAT         kernel_param->salt_repeat
 #define PWS_POS             kernel_param->pws_pos
diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
index ba87d835c..688291972 100644
--- a/OpenCL/m34000-pure.cl
+++ b/OpenCL/m34000-pure.cl
@@ -86,7 +86,7 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
 
   GLOBAL_AS argon2_extra_t *argon2_extra = V + bd4;
 
-  argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
+  argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST_BID];
 
   options.parallelism = ARGON2_PARALLELISM;
 
diff --git a/src/autotune.c b/src/autotune.c
index e8f704599..cf233a88f 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -403,6 +403,40 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       }
     }
 
+    if (1)
+    {
+      // some algorithm start ways to high with these theoretical preset (for instance, 8700)
+      // so much that they can't be tuned anymore
+
+      while ((kernel_accel > kernel_accel_min) || (kernel_threads > kernel_threads_min) || (kernel_loops > kernel_loops_min))
+      {
+        double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2);
+
+        if (exec_msec < target_msec / 16) break;
+
+        if (kernel_accel > kernel_accel_min)
+        {
+          kernel_accel = MAX (kernel_accel / 2, kernel_accel_min);
+
+          continue;
+        }
+
+        if (kernel_threads > kernel_threads_min)
+        {
+          kernel_threads = MAX (kernel_threads / 2, kernel_threads_min);
+
+          continue;
+        }
+
+        if (kernel_loops > kernel_loops_min)
+        {
+          kernel_loops = MAX (kernel_loops / 2, kernel_loops_min);
+
+          continue;
+        }
+      }
+    }
+
     for (u32 kernel_loops_test = kernel_loops; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
     {
       double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops_test, kernel_threads, 2);
diff --git a/src/modules/argon2_common.c b/src/modules/argon2_common.c
index cfe47071e..e6ebf9e1a 100644
--- a/src/modules/argon2_common.c
+++ b/src/modules/argon2_common.c
@@ -15,43 +15,6 @@
 #define ARGON2_SYNC_POINTS  4
 #define ARGON2_BLOCK_SIZE   1024
 
-typedef struct argon2_tmp
-{
-  u32 state[4]; // just something for now
-
-} argon2_tmp_t;
-
-typedef struct argon2_options
-{
-  u32 type;
-  u32 version;
-
-  u32 iterations;
-  u32 parallelism;
-  u32 memory_usage_in_kib;
-
-  u32 segment_length;
-  u32 lane_length;
-  u32 memory_block_count;
-
-  u32 digest_len;
-
-} argon2_options_t;
-
-u32 argon2_module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_min = 32; // hard-coded in kernel
-
-  return kernel_threads_min;
-}
-
-u32 argon2_module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 32; // hard-coded in kernel
-
-  return kernel_threads_max;
-}
-
 u64 argon2_module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 tmp_size = 0; // we'll add some later
diff --git a/src/modules/module_15500.c b/src/modules/module_15500.c
index cd8f4639b..720b6a785 100644
--- a/src/modules/module_15500.c
+++ b/src/modules/module_15500.c
@@ -67,6 +67,13 @@ u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED
   return esalt_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  u32 kernel_threads_max = 256;
+
+  return kernel_threads_max;
+}
+
 u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool optimized_kernel = (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL);
@@ -328,7 +335,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_20510.c b/src/modules/module_20510.c
index 394a69a3b..e92d96d6c 100644
--- a/src/modules/module_20510.c
+++ b/src/modules/module_20510.c
@@ -115,6 +115,13 @@ u32 module_hashes_count_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
   return tmp_size;
 }
 
+u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  u32 kernel_threads_max = 256;
+
+  return kernel_threads_max;
+}
+
 u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 tmp_size = (const u64) sizeof (pkzip_extra_t);
@@ -249,7 +256,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_34000.c b/src/modules/module_34000.c
index 34b621133..39d73df80 100644
--- a/src/modules/module_34000.c
+++ b/src/modules/module_34000.c
@@ -24,6 +24,7 @@ static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_SLOW_HASH_DIMY_LOOP;
 static const u64   OPTS_TYPE      = OPTS_TYPE_STOCK_MODULE
                                   | OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_NATIVE_THREADS
                                   | OPTS_TYPE_THREAD_MULTI_DISABLE
                                   | OPTS_TYPE_MP_MULTI_DISABLE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
@@ -45,6 +46,29 @@ u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig,
 const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
 const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
+typedef struct argon2_tmp
+{
+  u32 state[4]; // just something
+
+} argon2_tmp_t;
+
+typedef struct argon2_options
+{
+  u32 type;
+  u32 version;
+
+  u32 iterations;
+  u32 parallelism;
+  u32 memory_usage_in_kib;
+
+  u32 segment_length;
+  u32 lane_length;
+  u32 memory_block_count;
+
+  u32 digest_len;
+
+} argon2_options_t;
+
 #include "argon2_common.c"
 
 static const char *SIGNATURE_ARGON2D  = "$argon2d$";
@@ -269,8 +293,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = argon2_module_kernel_threads_max;
-  module_ctx->module_kernel_threads_min       = argon2_module_kernel_threads_min;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
   module_ctx->module_opti_type                = module_opti_type;

From 0dc26a18e44330b692f3e1b6ae27cb9610448167 Mon Sep 17 00:00:00 2001
From: oblivionsage <cookieandcream560@gmail.com>
Date: Tue, 8 Jul 2025 15:04:05 +0200
Subject: [PATCH 78/83] Address review feedback: Remove redundant ternary
 checks

- Remove redundant 'rc_version ? release_buf : "N/A"' checks
- Use release_buf directly since it's already initialized with "N/A"
- Addresses feedback from @ventaquil in PR review
---
 src/terminal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index dc1649eb5..c6599a332 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -1297,7 +1297,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     if (user_options->machine_readable == false)
     {
       event_log_info (hashcat_ctx, "OS.Name......: Windows");
-      event_log_info (hashcat_ctx, "OS.Release...: %s", rc_version ? release_buf : "N/A");
+      event_log_info (hashcat_ctx, "OS.Release...: %s", release_buf);
       event_log_info (hashcat_ctx, "HW.Platform..: %s", platform_buf);
       event_log_info (hashcat_ctx, "HW.Model.....: N/A");
     }
@@ -1305,7 +1305,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
     {
       printf ("\"OS\": { ");
       printf ("\"Name\": \"%s\", ", "Windows");
-      printf ("\"Release\": \"%s\" }, ", rc_version ? release_buf : "N/A");
+      printf ("\"Release\": \"%s\" }, ", release_buf);
       printf ("\"Hardware\": { ");
       printf ("\"Platform\": \"%s\", ", platform_buf);
       printf ("\"Model\": \"%s\" } ", "N/A");

From 853b14956177ef29b3c1f1cef084f216cf4bdfc8 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Tue, 8 Jul 2025 20:46:16 +0200
Subject: [PATCH 79/83] Argon2: add early support for multihash mixed mode
 cracking

This commit introduces initial support for mixed mode multihash cracking
in Argon2. Although I was skeptical at first, the final solution turned
out better than expected with only a minimal speed loss (1711H/s ->
1702H/s).

Unit tests have been updated to generate random combinations of
Argon2-I/D/ID with randomized m, t, and p values. So far, results look
solid.

Note: This is a complex change and may have undiscovered edge cases.

Some optimization opportunities remain. JIT-based optimizations are not
fully removed. We could also detect single-hash scenarios at runtime
and disable self-tests to re-enable JIT. Currently, the kernel workload
is sized based on the largest hash to avoid out-of-bound memory access.
---
 OpenCL/inc_hash_argon2.cl    | 11 ++++++
 OpenCL/inc_hash_argon2.h     |  1 +
 OpenCL/m34000-pure.cl        | 69 +++++++++++++++++-------------------
 src/modules/argon2_common.c  | 33 +++++++++++------
 tools/test_modules/m34000.pm |  8 ++---
 5 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl
index f9aba1cef..f00409c9b 100644
--- a/OpenCL/inc_hash_argon2.cl
+++ b/OpenCL/inc_hash_argon2.cl
@@ -399,3 +399,14 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const a
     out [i + 1] = h32_from_64_S (ctx.h[idx]);
   }
 }
+
+DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (const argon2_options_t *options, GLOBAL_AS void *buf, const int idx)
+{
+  GLOBAL_AS u32 *buf32 = (GLOBAL_AS u32 *) buf;
+
+  #ifdef ARGON2_TMP_ELEM
+  return (GLOBAL_AS argon2_block_t *) buf32 + (ARGON2_TMP_ELEM * idx);
+  #else
+  return (GLOBAL_AS argon2_block_t *) buf32 + (options->memory_block_count * idx);
+  #endif
+}
diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h
index a03b7c480..e6efa760f 100644
--- a/OpenCL/inc_hash_argon2.h
+++ b/OpenCL/inc_hash_argon2.h
@@ -153,5 +153,6 @@ typedef struct argon2_pos
 DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
 DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf, const u32 argon2_thread, const u32 argon2_lsz);
 DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u32 *out);
+DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (const argon2_options_t *options, GLOBAL_AS void *buf, const int idx);
 
 #endif // INC_HASH_ARGON2_H
diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
index 688291972..eee6d03e5 100644
--- a/OpenCL/m34000-pure.cl
+++ b/OpenCL/m34000-pure.cl
@@ -16,20 +16,10 @@
 
 typedef struct argon2_tmp
 {
-  u32 state[4]; // just something for now
+  u32 state[4];
 
 } argon2_tmp_t;
 
-typedef struct argon2_extra
-{
-#ifndef ARGON2_TMP_ELEM
-#define ARGON2_TMP_ELEM 1
-#endif
-
-  argon2_block_t blocks[ARGON2_TMP_ELEM];
-
-} argon2_extra_t;
-
 KERNEL_FQ KERNEL_FA void m34000_init (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
 {
   const u64 gid = get_global_id (0);
@@ -39,21 +29,21 @@ KERNEL_FQ KERNEL_FA void m34000_init (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
   const u32 gd4 = gid / 4;
   const u32 gm4 = gid % 4;
 
-  GLOBAL_AS argon2_extra_t *V;
+  GLOBAL_AS void *V;
 
   switch (gm4)
   {
-    case 0: V = (GLOBAL_AS argon2_extra_t *) d_extra0_buf; break;
-    case 1: V = (GLOBAL_AS argon2_extra_t *) d_extra1_buf; break;
-    case 2: V = (GLOBAL_AS argon2_extra_t *) d_extra2_buf; break;
-    case 3: V = (GLOBAL_AS argon2_extra_t *) d_extra3_buf; break;
+    case 0: V = d_extra0_buf; break;
+    case 1: V = d_extra1_buf; break;
+    case 2: V = d_extra2_buf; break;
+    case 3: V = d_extra3_buf; break;
   }
 
-  GLOBAL_AS argon2_extra_t *argon2_extra = V + gd4;
-
   const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
 
-  argon2_init (&pws[gid], &salt_bufs[SALT_POS_HOST], &options, argon2_extra->blocks);
+  GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block (&options, V, gd4);
+
+  argon2_init (&pws[gid], &salt_bufs[SALT_POS_HOST], &options, argon2_block);
 }
 
 KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
@@ -68,27 +58,34 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
   const u32 argon2_thread = get_local_id (0);
   const u32 argon2_lsz = get_local_size (0);
 
+  #ifdef ARGON2_PARALLELISM
   LOCAL_VK u64 shuffle_bufs[ARGON2_PARALLELISM][32];
+  #else
+  LOCAL_VK u64 shuffle_bufs[32][32];
+  #endif
+
   LOCAL_AS u64 *shuffle_buf = shuffle_bufs[lid];
 
   const u32 bd4 = bid / 4;
   const u32 bm4 = bid % 4;
 
-  GLOBAL_AS argon2_extra_t *V;
+  GLOBAL_AS void *V;
 
   switch (bm4)
   {
-    case 0: V = (GLOBAL_AS argon2_extra_t *) d_extra0_buf; break;
-    case 1: V = (GLOBAL_AS argon2_extra_t *) d_extra1_buf; break;
-    case 2: V = (GLOBAL_AS argon2_extra_t *) d_extra2_buf; break;
-    case 3: V = (GLOBAL_AS argon2_extra_t *) d_extra3_buf; break;
+    case 0: V = d_extra0_buf; break;
+    case 1: V = d_extra1_buf; break;
+    case 2: V = d_extra2_buf; break;
+    case 3: V = d_extra3_buf; break;
   }
 
-  GLOBAL_AS argon2_extra_t *argon2_extra = V + bd4;
-
   argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST_BID];
 
+  #ifdef ARGON2_PARALLELISM
   options.parallelism = ARGON2_PARALLELISM;
+  #endif
+
+  GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block (&options, V, bd4);
 
   argon2_pos_t pos;
 
@@ -99,7 +96,7 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
   {
     for (pos.lane = lid; pos.lane < options.parallelism; pos.lane += lsz)
     {
-      argon2_fill_segment (argon2_extra->blocks, &options, &pos, shuffle_buf, argon2_thread, argon2_lsz);
+      argon2_fill_segment (argon2_block, &options, &pos, shuffle_buf, argon2_thread, argon2_lsz);
     }
 
     SYNC_THREADS ();
@@ -123,23 +120,23 @@ KERNEL_FQ KERNEL_FA void m34000_comp (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
   const u32 gd4 = gid / 4;
   const u32 gm4 = gid % 4;
 
-  GLOBAL_AS argon2_extra_t *V;
+  GLOBAL_AS void *V;
 
   switch (gm4)
   {
-    case 0: V = (GLOBAL_AS argon2_extra_t *) d_extra0_buf; break;
-    case 1: V = (GLOBAL_AS argon2_extra_t *) d_extra1_buf; break;
-    case 2: V = (GLOBAL_AS argon2_extra_t *) d_extra2_buf; break;
-    case 3: V = (GLOBAL_AS argon2_extra_t *) d_extra3_buf; break;
+    case 0: V = d_extra0_buf; break;
+    case 1: V = d_extra1_buf; break;
+    case 2: V = d_extra2_buf; break;
+    case 3: V = d_extra3_buf; break;
   }
 
-  GLOBAL_AS argon2_extra_t *argon2_extra = V + gd4;
+  argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
+
+  GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block (&options, V, gd4);
 
   u32 out[8];
 
-  const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
-
-  argon2_final (argon2_extra->blocks, &options, out);
+  argon2_final (argon2_block, &options, out);
 
   const u32 r0 = out[0];
   const u32 r1 = out[1];
diff --git a/src/modules/argon2_common.c b/src/modules/argon2_common.c
index e6ebf9e1a..52961a2e3 100644
--- a/src/modules/argon2_common.c
+++ b/src/modules/argon2_common.c
@@ -22,14 +22,28 @@ u64 argon2_module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_U
   return tmp_size;
 }
 
+u64 get_largest_memory_block_count (const hashes_t *hashes)
+{
+  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
+  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
+
+  u64 largest_memory_block_count = (options_st == NULL) ? options->memory_block_count : options_st->memory_block_count;
+
+  for (u32 i = 0; i < hashes->salts_cnt; i++)
+  {
+    largest_memory_block_count = MAX (largest_memory_block_count, options->memory_block_count);
+
+    options++;
+  }
+
+  return largest_memory_block_count;
+}
+
 const char *argon2_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
 {
   hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
 
-  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
-  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
-
-  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+  const u32 memory_block_count = get_largest_memory_block_count (hashes);
 
   const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
 
@@ -79,10 +93,7 @@ const char *argon2_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t
 
 u64 argon2_module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
-  argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
-  argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
-
-  const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
+  const u32 memory_block_count = get_largest_memory_block_count (hashes);
 
   const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
 
@@ -93,6 +104,7 @@ u64 argon2_module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig
 
 u64 argon2_module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
 {
+  /*
   argon2_options_t *options    = (argon2_options_t *) hashes->esalts_buf;
   argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
 
@@ -118,6 +130,7 @@ u64 argon2_module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, M
       return (1ULL << 62);
     }
   }
+  */
 
   u64 tmp_size = sizeof (argon2_tmp_t);
 
@@ -126,11 +139,11 @@ u64 argon2_module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, M
 
 char *argon2_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
 {
-  argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
+  //argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
 
   char *jit_build_options = NULL;
 
-  hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
+  //hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
 
   return jit_build_options;
 }
diff --git a/tools/test_modules/m34000.pm b/tools/test_modules/m34000.pm
index 7b2e671ff..623c6c7f3 100644
--- a/tools/test_modules/m34000.pm
+++ b/tools/test_modules/m34000.pm
@@ -18,9 +18,9 @@ sub module_generate_hash
   my $word  = shift;
   my $salt  = shift;
   my $sign  = shift // ("argon2d","argon2i","argon2id")[random_number (0, 2)];
-  my $m     = shift // 65536;
-  my $t     = shift // 3;
-  my $p     = shift // 1;
+  my $m     = shift // (1 << random_number (12, 18));
+  my $t     = shift // random_number (1, 8);
+  my $p     = shift // random_number (1, 8);
   my $len   = shift // random_number (1, 2) * 16;
 
   my $salt_bin = pack ("H*", $salt);
@@ -48,7 +48,7 @@ sub module_verify_hash
 
   return unless ((substr ($hash, 0,  9) eq '$argon2d$')
               || (substr ($hash, 0,  9) eq '$argon2i$')
-              || (substr ($hash, 0, 10) eq '$argon2id$'));              
+              || (substr ($hash, 0, 10) eq '$argon2id$'));
 
   my (undef, $signature, $version, $config, $salt, $digest) = split '\$', $hash;
 

From 5210ccd50db4049370f470df58593ae8ce63e079 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Tue, 8 Jul 2025 22:44:23 +0200
Subject: [PATCH 80/83] got more stable cracking with argon2 on Apple Metal

---
 OpenCL/inc_hash_argon2.cl | 21 ++++++++-------------
 OpenCL/inc_hash_argon2.h  |  6 +++++-
 OpenCL/m34000-pure.cl     |  2 ++
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl
index f00409c9b..190d3b2d2 100644
--- a/OpenCL/inc_hash_argon2.cl
+++ b/OpenCL/inc_hash_argon2.cl
@@ -219,14 +219,9 @@ DECLSPEC void argon2_hash_block (u64 R[4], int argon2_thread, LOCAL_AS u64 *shuf
 
 DECLSPEC void argon2_next_addresses (PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, PRIVATE_AS u32 *addresses, u32 start_index, u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz)
 {
-  u64 Z[4];
+  u64 Z[4] = { 0 };
 
-  Z[0] = 0;
-  Z[1] = 0;
-  Z[2] = 0;
-  Z[3] = 0;
-
-  u64 tmp[4];
+  u64 tmp[4] = { 0 };
 
   tmp[0] = 0;
   tmp[1] = 0;
@@ -299,7 +294,7 @@ DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_
 {
   for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism)
   {
-    u32 ref_address;
+    u32 ref_address = 0;
 
     if (indep_addr)
     {
@@ -340,7 +335,7 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS
   const u32 skip_blocks   = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0;
   const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks;
 
-  u64 R[4];
+  u64 R[4] = { 0 };
 
   GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, argon2_thread);
 
@@ -351,7 +346,7 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS
       const u32 start_index = (block_index == 0) ? skip_blocks : block_index;
       const u32 end_index   = MIN(((start_index | 127) + 1), options->segment_length);
 
-      u32 addresses[4] = { 0, 0, 0, 0 };
+      u32 addresses[4] = { 0 };
 
       argon2_next_addresses (options, pos, addresses, block_index, argon2_thread, shuffle_buf, argon2_lsz);
       argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz);
@@ -379,8 +374,8 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const a
     for (u32 idx = 0; idx < 128; idx++) final_block.values[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx];
   }
 
-  u32 output_len [32] = {0};
-  output_len [0] = options->digest_len;
+  u32 output_len[32] = { 0 };
+  output_len[0] = options->digest_len;
 
   blake2b_ctx_t ctx;
   blake2b_init (&ctx);
@@ -400,7 +395,7 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const a
   }
 }
 
-DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (const argon2_options_t *options, GLOBAL_AS void *buf, const int idx)
+DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (PRIVATE_AS const argon2_options_t *options, GLOBAL_AS void *buf, const int idx)
 {
   GLOBAL_AS u32 *buf32 = (GLOBAL_AS u32 *) buf;
 
diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h
index e6efa760f..15be552ee 100644
--- a/OpenCL/inc_hash_argon2.h
+++ b/OpenCL/inc_hash_argon2.h
@@ -76,6 +76,8 @@ DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, co
 
   const u64 out = shuffle_buf[src_lane & (argon2_lsz - 1)];
 
+  barrier (CLK_LOCAL_MEM_FENCE);
+
   return out;
 }
 #endif
@@ -91,6 +93,8 @@ DECLSPEC u64 hc__shfl (LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_l
 
   const u64 out = shuffle_buf[src_lane & (argon2_lsz - 1)];
 
+  SYNC_THREADS();
+
   return out;
 }
 #endif
@@ -153,6 +157,6 @@ typedef struct argon2_pos
 DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
 DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf, const u32 argon2_thread, const u32 argon2_lsz);
 DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u32 *out);
-DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (const argon2_options_t *options, GLOBAL_AS void *buf, const int idx);
+DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (PRIVATE_AS const argon2_options_t *options, GLOBAL_AS void *buf, const int idx);
 
 #endif // INC_HASH_ARGON2_H
diff --git a/OpenCL/m34000-pure.cl b/OpenCL/m34000-pure.cl
index eee6d03e5..391bb110f 100644
--- a/OpenCL/m34000-pure.cl
+++ b/OpenCL/m34000-pure.cl
@@ -66,6 +66,8 @@ KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2
 
   LOCAL_AS u64 *shuffle_buf = shuffle_bufs[lid];
 
+  SYNC_THREADS();
+
   const u32 bd4 = bid / 4;
   const u32 bm4 = bid % 4;
 

From adbf9d175b79e58276eead5922cc9bf73c6636a0 Mon Sep 17 00:00:00 2001
From: Gabriele Gristina <matrix@users.noreply.github.com>
Date: Wed, 9 Jul 2025 08:16:00 +0200
Subject: [PATCH 81/83] using simd_shuffle on Apple Metal for argon2

---
 OpenCL/inc_hash_argon2.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h
index 15be552ee..a623f492f 100644
--- a/OpenCL/inc_hash_argon2.h
+++ b/OpenCL/inc_hash_argon2.h
@@ -83,17 +83,19 @@ DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, co
 #endif
 
 #elif defined IS_METAL
-#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) hc__shfl ((shfbuf),(var),(srcLane),(argon2_thread),(argon2_lsz))
+#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) simd_shuffle_64 ((var),(srcLane),(argon2_lsz))
 
-DECLSPEC u64 hc__shfl (LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
+DECLSPEC u64 simd_shuffle_64 (const u64 var, const int src_lane, const u32 argon2_lsz)
 {
-  shuffle_buf[argon2_thread] = var;
+  const u32 idx = src_lane & (argon2_lsz - 1);
 
-  SYNC_THREADS();
+  const u32 l32 = l32_from_64_S (var);
+  const u32 h32 = h32_from_64_S (var);
 
-  const u64 out = shuffle_buf[src_lane & (argon2_lsz - 1)];
+  u32 l32r = simd_shuffle (l32, idx);
+  u32 h32r = simd_shuffle (h32, idx);
 
-  SYNC_THREADS();
+  const u64 out = hl32_to_64_S (h32r, l32r);
 
   return out;
 }

From 5f41bfa3f4c9bda59473b1c398f74b8005cb2a9f Mon Sep 17 00:00:00 2001
From: luke <92046606+dunghm19@users.noreply.github.com>
Date: Wed, 9 Jul 2025 09:14:09 +0200
Subject: [PATCH 82/83] Updated to latest hashcat base

---
 OpenCL/m33100_a0-pure.cl   | 6 +++---
 OpenCL/m33100_a1-pure.cl   | 4 ++--
 OpenCL/m33100_a3-pure.cl   | 4 ++--
 src/modules/module_33100.c | 2 ++
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/OpenCL/m33100_a0-pure.cl b/OpenCL/m33100_a0-pure.cl
index a8f944ef8..b2dd16ad7 100644
--- a/OpenCL/m33100_a0-pure.cl
+++ b/OpenCL/m33100_a0-pure.cl
@@ -28,7 +28,7 @@
 #define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
-KERNEL_FQ void m33100_mxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33100_mxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
@@ -130,7 +130,7 @@ KERNEL_FQ void m33100_mxx (KERN_ATTR_RULES ())
     w3[1] = 0;
     w3[2] = 0;
     w3[3] = 0;
-  
+
     md5_update_64 (&ctx, w0, w1, w2, w3, 32);
 
     md5_update (&ctx, s, salt_len);
@@ -146,7 +146,7 @@ KERNEL_FQ void m33100_mxx (KERN_ATTR_RULES ())
   }
 }
 
-KERNEL_FQ void m33100_sxx (KERN_ATTR_RULES ())
+KERNEL_FQ KERNEL_FA void m33100_sxx (KERN_ATTR_RULES ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33100_a1-pure.cl b/OpenCL/m33100_a1-pure.cl
index b56745f39..e2d62d8b6 100644
--- a/OpenCL/m33100_a1-pure.cl
+++ b/OpenCL/m33100_a1-pure.cl
@@ -26,7 +26,7 @@
 #define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
-KERNEL_FQ void m33100_mxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33100_mxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
@@ -142,7 +142,7 @@ KERNEL_FQ void m33100_mxx (KERN_ATTR_BASIC ())
   }
 }
 
-KERNEL_FQ void m33100_sxx (KERN_ATTR_BASIC ())
+KERNEL_FQ KERNEL_FA void m33100_sxx (KERN_ATTR_BASIC ())
 {
   /**
    * modifier
diff --git a/OpenCL/m33100_a3-pure.cl b/OpenCL/m33100_a3-pure.cl
index 9a5af70bb..3674d3c93 100644
--- a/OpenCL/m33100_a3-pure.cl
+++ b/OpenCL/m33100_a3-pure.cl
@@ -26,7 +26,7 @@
 #define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
 #endif
 
-KERNEL_FQ void m33100_mxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33100_mxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
@@ -155,7 +155,7 @@ KERNEL_FQ void m33100_mxx (KERN_ATTR_VECTOR ())
   }
 }
 
-KERNEL_FQ void m33100_sxx (KERN_ATTR_VECTOR ())
+KERNEL_FQ KERNEL_FA void m33100_sxx (KERN_ATTR_VECTOR ())
 {
   /**
    * modifier
diff --git a/src/modules/module_33100.c b/src/modules/module_33100.c
index 56e88084b..1ef07b77a 100644
--- a/src/modules/module_33100.c
+++ b/src/modules/module_33100.c
@@ -152,6 +152,8 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
   module_ctx->module_benchmark_charset        = MODULE_DEFAULT;
   module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_bridge_name              = MODULE_DEFAULT;
+  module_ctx->module_bridge_type              = MODULE_DEFAULT;
   module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
   module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
   module_ctx->module_deprecated_notice        = MODULE_DEFAULT;

From 02a439ce02c722dcb798e2b2869a5d94606f21e5 Mon Sep 17 00:00:00 2001
From: Jens Steube <jens.steube@gmail.com>
Date: Wed, 9 Jul 2025 15:54:03 +0200
Subject: [PATCH 83/83] Fix compile error on MSYS2 console

---
 src/terminal.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/terminal.c b/src/terminal.c
index 0b6e00f41..c36cf4c20 100644
--- a/src/terminal.c
+++ b/src/terminal.c
@@ -17,6 +17,13 @@
 #include "timer.h"
 #include "terminal.h"
 
+#if defined (_POSIX)
+#include <sys/utsname.h>
+#if !defined (__APPLE__)
+#include <sys/sysinfo.h>
+#endif
+#endif
+
 static const size_t MAXIMUM_EXAMPLE_HASH_LENGTH = 200;
 
 static const size_t TERMINAL_LINE_LENGTH = 79;
@@ -1253,7 +1260,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
       printf ("\"SystemInfo\": { ");
     }
 
-    #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__)
+    #if defined (_WIN)
     // Get Windows system information
     SYSTEM_INFO sysinfo;
     OSVERSIONINFO osvi;
@@ -1311,6 +1318,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
       printf ("\"Model\": \"%s\" } ", "N/A");
       printf ("}, ");
     }
+
     #else
 
     struct utsname utsbuf;
@@ -1320,7 +1328,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx)
 
     char *hw_model_buf = NULL;
 
-    #if !defined (__linux__)
+    #if !defined (__linux__) && !defined (__CYGWIN__) && !defined (__MSYS__)
 
     size_t hw_model_len = 0;