diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl
index 8cecc661d..8ad61b27f 100644
--- a/OpenCL/inc_rp.cl
+++ b/OpenCL/inc_rp.cl
@@ -269,6 +269,35 @@ DECLSPEC int mangle_toggle_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p
   return (len);
 }
 
+DECLSPEC int mangle_toggle_at_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len)
+{
+  if (len >= RP_PASSWORD_SIZE) return (len);
+
+  u8 occurence = 0;
+
+  u32 rem = 0;
+
+  for (int i = 0, idx = 0; i < len; i += 4, idx += 1)
+  {
+    const u32 t = buf[idx];
+
+    buf[idx] = t | generate_cmask (t);
+
+    u32 out = rem;
+
+    rem = 0;
+
+    if (((t >>  0) & 0xff) == p1) { if (occurence == p0) out = 0x0000ff00; occurence++; }
+    if (((t >>  8) & 0xff) == p1) { if (occurence == p0) out = 0x00ff0000; occurence++; }
+    if (((t >> 16) & 0xff) == p1) { if (occurence == p0) out = 0xff000000; occurence++; }
+    if (((t >> 24) & 0xff) == p1) { if (occurence == p0) rem = 0x000000ff; occurence++; }
+
+    buf[idx] = t ^ (generate_cmask (t) & out);
+  }
+
+  return (len);
+}
+
 DECLSPEC int mangle_reverse (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len)
 {
   for (int l = 0; l < len / 2; l++)
@@ -725,6 +754,7 @@ DECLSPEC int apply_rule (const u32 name, MAYBE_UNUSED const u8 p0, MAYBE_UNUSED
     case RULE_OP_MANGLE_UREST_LFIRST:     out_len = mangle_urest_lfirst     (p0, p1,        buf, out_len); break;
     case RULE_OP_MANGLE_TREST:            out_len = mangle_trest            (p0, p1,        buf, out_len); break;
     case RULE_OP_MANGLE_TOGGLE_AT:        out_len = mangle_toggle_at        (p0, p1,        buf, out_len); break;
+    case RULE_OP_MANGLE_TOGGLE_AT_SEP:    out_len = mangle_toggle_at_sep    (p0, p1,        buf, out_len); break;
     case RULE_OP_MANGLE_REVERSE:          out_len = mangle_reverse          (p0, p1,        buf, out_len); break;
     case RULE_OP_MANGLE_DUPEWORD:         out_len = mangle_dupeword         (p0, p1,        buf, out_len); break;
     case RULE_OP_MANGLE_DUPEWORD_TIMES:   out_len = mangle_dupeword_times   (p0, p1, (u8 *) buf, out_len); break;
diff --git a/OpenCL/inc_rp.h b/OpenCL/inc_rp.h
index c13d06e1f..3b91ff9da 100644
--- a/OpenCL/inc_rp.h
+++ b/OpenCL/inc_rp.h
@@ -21,6 +21,7 @@
 #define RULE_OP_MANGLE_UREST_LFIRST     'C'
 #define RULE_OP_MANGLE_TREST            't'
 #define RULE_OP_MANGLE_TOGGLE_AT        'T'
+#define RULE_OP_MANGLE_TOGGLE_AT_SEP    '3'
 #define RULE_OP_MANGLE_REVERSE          'r'
 #define RULE_OP_MANGLE_DUPEWORD         'd'
 #define RULE_OP_MANGLE_DUPEWORD_TIMES   'p'
@@ -83,6 +84,7 @@ DECLSPEC int mangle_urest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u
 DECLSPEC int mangle_urest_lfirst (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
 DECLSPEC int mangle_trest (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
 DECLSPEC int mangle_toggle_at (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
+DECLSPEC int mangle_toggle_at_sep (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
 DECLSPEC int mangle_reverse (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
 DECLSPEC int mangle_dupeword (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u32 *buf, const int len);
 DECLSPEC int mangle_dupeword_times (MAYBE_UNUSED const u8 p0, MAYBE_UNUSED const u8 p1, u8 *buf, const int len);
diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl
index 026198f09..38239c0ec 100644
--- a/OpenCL/inc_rp_optimized.cl
+++ b/OpenCL/inc_rp_optimized.cl
@@ -1163,6 +1163,82 @@ DECLSPEC u32 rule_op_mangle_toggle_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c
   return (in_len);
 }
 
+DECLSPEC u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len)
+{
+  if (in_len == 0) return in_len;
+
+  u32 r0 = search_on_register (buf0[0], p1);
+  u32 r1 = search_on_register (buf0[1], p1);
+  u32 r2 = search_on_register (buf0[2], p1);
+  u32 r3 = search_on_register (buf0[3], p1);
+  u32 r4 = search_on_register (buf1[0], p1);
+  u32 r5 = search_on_register (buf1[1], p1);
+  u32 r6 = search_on_register (buf1[2], p1);
+  u32 r7 = search_on_register (buf1[3], p1);
+
+  const u32 rn = (r0 <<  0)
+               | (r1 <<  4)
+               | (r2 <<  8)
+               | (r3 << 12)
+               | (r4 << 16)
+               | (r5 << 20)
+               | (r6 << 24)
+               | (r7 << 28);
+
+  if (rn == 0) return in_len;
+
+  u32 occurence = 0;
+
+  u32 ro = 0;
+
+  #ifdef _unroll
+  #pragma unroll
+  #endif
+  for (int i = 0; i < 32; i++)
+  {
+    if ((rn >> i) & 1)
+    {
+      if (occurence == p0)
+      {
+        ro = 1 << i;
+
+        break;
+      }
+
+      occurence++;
+    }
+  }
+
+  r0 = (ro >>  0) & 15;
+  r1 = (ro >>  4) & 15;
+  r2 = (ro >>  8) & 15;
+  r3 = (ro >> 12) & 15;
+  r4 = (ro >> 16) & 15;
+  r5 = (ro >> 20) & 15;
+  r6 = (ro >> 24) & 15;
+  r7 = (ro >> 28) & 15;
+
+  r0 <<= 1;
+  r1 <<= 1; r1 |= r0 >> 4;
+  r2 <<= 1; r2 |= r1 >> 4;
+  r3 <<= 1; r3 |= r2 >> 4;
+  r4 <<= 1; r4 |= r3 >> 4;
+  r5 <<= 1; r5 |= r4 >> 4;
+  r6 <<= 1; r6 |= r5 >> 4;
+  r7 <<= 1; r7 |= r6 >> 4;
+
+  buf0[0] = toggle_on_register (buf0[0], r0);
+  buf0[1] = toggle_on_register (buf0[1], r1);
+  buf0[2] = toggle_on_register (buf0[2], r2);
+  buf0[3] = toggle_on_register (buf0[3], r3);
+  buf1[0] = toggle_on_register (buf1[0], r4);
+  buf1[1] = toggle_on_register (buf1[1], r5);
+  buf1[2] = toggle_on_register (buf1[2], r6);
+  buf1[3] = toggle_on_register (buf1[3], r7);
+
+  return in_len;
+}
+
 DECLSPEC u32 rule_op_mangle_reverse (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len)
 {
   reverse_block_optimized (buf0, buf1, buf0, buf1, in_len);
@@ -2285,6 +2361,7 @@ DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, u
     case RULE_OP_MANGLE_UREST_LFIRST:     out_len = rule_op_mangle_urest_lfirst     (p0, p1, buf0, buf1, out_len); break;
     case RULE_OP_MANGLE_TREST:            out_len = rule_op_mangle_trest            (p0, p1, buf0, buf1, out_len); break;
     case RULE_OP_MANGLE_TOGGLE_AT:        out_len = rule_op_mangle_toggle_at        (p0, p1, buf0, buf1, out_len); break;
+    case RULE_OP_MANGLE_TOGGLE_AT_SEP:    out_len = rule_op_mangle_toggle_at_sep    (p0, p1, buf0, buf1, out_len); break;
     case RULE_OP_MANGLE_REVERSE:          out_len = rule_op_mangle_reverse          (p0, p1, buf0, buf1, out_len); break;
     case RULE_OP_MANGLE_DUPEWORD:         out_len = rule_op_mangle_dupeword         (p0, p1, buf0, buf1, out_len); break;
     case RULE_OP_MANGLE_DUPEWORD_TIMES:   out_len = rule_op_mangle_dupeword_times   (p0, p1, buf0, buf1, out_len); break;
diff --git a/OpenCL/inc_rp_optimized.h b/OpenCL/inc_rp_optimized.h
index b6a133086..d391d1cfe 100644
--- a/OpenCL/inc_rp_optimized.h
+++ b/OpenCL/inc_rp_optimized.h
@@ -21,6 +21,7 @@
 #define RULE_OP_MANGLE_UREST_LFIRST     'C'
 #define RULE_OP_MANGLE_TREST            't'
 #define RULE_OP_MANGLE_TOGGLE_AT        'T'
+#define RULE_OP_MANGLE_TOGGLE_AT_SEP    '3'
 #define RULE_OP_MANGLE_REVERSE          'r'
 #define RULE_OP_MANGLE_DUPEWORD         'd'
 #define RULE_OP_MANGLE_DUPEWORD_TIMES   'p'
@@ -85,6 +86,7 @@ DECLSPEC u32 rule_op_mangle_lrest_ufirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSE
 DECLSPEC u32 rule_op_mangle_urest_lfirst (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
 DECLSPEC u32 rule_op_mangle_trest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
 DECLSPEC u32 rule_op_mangle_toggle_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
+DECLSPEC u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
 DECLSPEC u32 rule_op_mangle_reverse (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
 DECLSPEC u32 rule_op_mangle_dupeword (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
 DECLSPEC u32 rule_op_mangle_dupeword_times (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 *buf0, MAYBE_UNUSED u32 *buf1, const u32 in_len);
diff --git a/OpenCL/m26700-pure.cl b/OpenCL/m26700-pure.cl
new file mode 100644
index 000000000..8a5a04325
--- /dev/null
+++ b/OpenCL/m26700-pure.cl
@@ -0,0 +1,369 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.cl"
+#include "inc_common.cl"
+#include "inc_simd.cl"
+#include "inc_hash_sha224.cl"
+#endif
+
+#define COMPARE_S "inc_comp_single.cl"
+#define COMPARE_M "inc_comp_multi.cl"
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_MAX  16
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_sha224_tmp
+{
+  u32 tmp[SNMPV3_TMP_ELEMS];
+  u32 h[SNMPV3_HASH_ELEMS];
+
+} hmac_sha224_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+KERNEL_FQ void m26700_init (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  u8 *src_ptr = (u8 *) w;
+
+  // password 64 times, also swapped
+
+  u32 dst_buf[16];
+
+  u8 *dst_ptr = (u8 *) dst_buf;
+
+  int tmp_idx = 0;
+
+  for (int i = 0; i < 64; i++)
+  {
+    for (int j = 0; j < pw_len; j++)
+    {
+      const int dst_idx = tmp_idx & 63;
+
+      dst_ptr[dst_idx] = src_ptr[j];
+
+      // write to global memory every time 64 byte are written into cache
+
+      if (dst_idx == 63)
+      {
+        const int tmp_idx4 = (tmp_idx - 63) / 4;
+
+        tmps[gid].tmp[tmp_idx4 +  0] = hc_swap32_S (dst_buf[ 0]);
+        tmps[gid].tmp[tmp_idx4 +  1] = hc_swap32_S (dst_buf[ 1]);
+        tmps[gid].tmp[tmp_idx4 +  2] = hc_swap32_S (dst_buf[ 2]);
+        tmps[gid].tmp[tmp_idx4 +  3] = hc_swap32_S (dst_buf[ 3]);
+        tmps[gid].tmp[tmp_idx4 +  4] = hc_swap32_S (dst_buf[ 4]);
+        tmps[gid].tmp[tmp_idx4 +  5] = hc_swap32_S (dst_buf[ 5]);
+        tmps[gid].tmp[tmp_idx4 +  6] = hc_swap32_S (dst_buf[ 6]);
+        tmps[gid].tmp[tmp_idx4 +  7] = hc_swap32_S (dst_buf[ 7]);
+        tmps[gid].tmp[tmp_idx4 +  8] = hc_swap32_S (dst_buf[ 8]);
+        tmps[gid].tmp[tmp_idx4 +  9] = hc_swap32_S (dst_buf[ 9]);
+        tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
+        tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
+        tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
+        tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
+        tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
+        tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
+      }
+
+      tmp_idx++;
+    }
+  }
+
+  // hash
+
+  tmps[gid].h[0] = SHA224M_A;
+  tmps[gid].h[1] = SHA224M_B;
+  tmps[gid].h[2] = SHA224M_C;
+  tmps[gid].h[3] = SHA224M_D;
+  tmps[gid].h[4] = SHA224M_E;
+  tmps[gid].h[5] = SHA224M_F;
+  tmps[gid].h[6] = SHA224M_G;
+  tmps[gid].h[7] = SHA224M_H;
+}
+
+KERNEL_FQ void m26700_loop (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 h[8];
+
+  h[0] = tmps[gid].h[0];
+  h[1] = tmps[gid].h[1];
+  h[2] = tmps[gid].h[2];
+  h[3] = tmps[gid].h[3];
+  h[4] = tmps[gid].h[4];
+  h[5] = tmps[gid].h[5];
+  h[6] = tmps[gid].h[6];
+  h[7] = tmps[gid].h[7];
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  const int pw_len64 = pw_len * 64;
+
+  #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
+                                    // for pw length > 64 we use global memory reads
+
+  if (pw_len < 64)
+  {
+    u32 tmp[SNMPV3_TMP_ELEMS_OPT];
+
+    for (int i = 0; i < pw_len64 / 4; i++)
+    {
+      tmp[i] = tmps[gid].tmp[i];
+    }
+
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      w0[0] = tmp[idx +  0];
+      w0[1] = tmp[idx +  1];
+      w0[2] = tmp[idx +  2];
+      w0[3] = tmp[idx +  3];
+      w1[0] = tmp[idx +  4];
+      w1[1] = tmp[idx +  5];
+      w1[2] = tmp[idx +  6];
+      w1[3] = tmp[idx +  7];
+      w2[0] = tmp[idx +  8];
+      w2[1] = tmp[idx +  9];
+      w2[2] = tmp[idx + 10];
+      w2[3] = tmp[idx + 11];
+      w3[0] = tmp[idx + 12];
+      w3[1] = tmp[idx + 13];
+      w3[2] = tmp[idx + 14];
+      w3[3] = tmp[idx + 15];
+
+      sha224_transform (w0, w1, w2, w3, h);
+    }
+  }
+  else
+  {
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      w0[0] = tmps[gid].tmp[idx +  0];
+      w0[1] = tmps[gid].tmp[idx +  1];
+      w0[2] = tmps[gid].tmp[idx +  2];
+      w0[3] = tmps[gid].tmp[idx +  3];
+      w1[0] = tmps[gid].tmp[idx +  4];
+      w1[1] = tmps[gid].tmp[idx +  5];
+      w1[2] = tmps[gid].tmp[idx +  6];
+      w1[3] = tmps[gid].tmp[idx +  7];
+      w2[0] = tmps[gid].tmp[idx +  8];
+      w2[1] = tmps[gid].tmp[idx +  9];
+      w2[2] = tmps[gid].tmp[idx + 10];
+      w2[3] = tmps[gid].tmp[idx + 11];
+      w3[0] = tmps[gid].tmp[idx + 12];
+      w3[1] = tmps[gid].tmp[idx + 13];
+      w3[2] = tmps[gid].tmp[idx + 14];
+      w3[3] = tmps[gid].tmp[idx + 15];
+
+      sha224_transform (w0, w1, w2, w3, h);
+    }
+  }
+
+  tmps[gid].h[0] = h[0];
+  tmps[gid].h[1] = h[1];
+  tmps[gid].h[2] = h[2];
+  tmps[gid].h[3] = h[3];
+  tmps[gid].h[4] = h[4];
+  tmps[gid].h[5] = h[5];
+  tmps[gid].h[6] = h[6];
+  tmps[gid].h[7] = h[7];
+}
+
+KERNEL_FQ void m26700_comp (KERN_ATTR_TMPS_ESALT (hmac_sha224_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  w0[0] = 0x80000000;
+  w0[1] = 0;
+  w0[2] = 0;
+  w0[3] = 0;
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 1048576 * 8;
+
+  u32 h[8];
+
+  h[0] = tmps[gid].h[0];
+  h[1] = tmps[gid].h[1];
+  h[2] = tmps[gid].h[2];
+  h[3] = tmps[gid].h[3];
+  h[4] = tmps[gid].h[4];
+  h[5] = tmps[gid].h[5];
+  h[6] = tmps[gid].h[6];
+  h[7] = tmps[gid].h[7];
+
+  sha224_transform (w0, w1, w2, w3, h);
+
+  sha224_ctx_t ctx;
+
+  sha224_init (&ctx);
+
+  u32 w[16];
+
+  w[ 0] = h[0];
+  w[ 1] = h[1];
+  w[ 2] = h[2];
+  w[ 3] = h[3];
+  w[ 4] = h[4];
+  w[ 5] = h[5];
+  w[ 6] = h[6];
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha224_update (&ctx, w, 28);
+
+  sha224_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
+
+  w[ 0] = h[0];
+  w[ 1] = h[1];
+  w[ 2] = h[2];
+  w[ 3] = h[3];
+  w[ 4] = h[4];
+  w[ 5] = h[5];
+  w[ 6] = h[6];
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha224_update (&ctx, w, 28);
+
+  sha224_final (&ctx);
+
+  w[ 0] = ctx.h[0];
+  w[ 1] = ctx.h[1];
+  w[ 2] = ctx.h[2];
+  w[ 3] = ctx.h[3];
+  w[ 4] = ctx.h[4];
+  w[ 5] = ctx.h[5];
+  w[ 6] = ctx.h[6];
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha224_hmac_ctx_t hmac_ctx;
+
+  sha224_hmac_init (&hmac_ctx, w, 28);
+
+  sha224_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
+
+  sha224_hmac_final (&hmac_ctx);
+
+  const u32 r0 = hmac_ctx.opad.h[DGST_R0];
+  const u32 r1 = hmac_ctx.opad.h[DGST_R1];
+  const u32 r2 = hmac_ctx.opad.h[DGST_R2];
+  const u32 r3 = hmac_ctx.opad.h[DGST_R3];
+
+  #define il_pos 0
+
+  #ifdef KERNEL_STATIC
+  #include COMPARE_M
+  #endif
+}
diff --git a/OpenCL/m26800-pure.cl b/OpenCL/m26800-pure.cl
new file mode 100644
index 000000000..9b37fb5d0
--- /dev/null
+++ b/OpenCL/m26800-pure.cl
@@ -0,0 +1,369 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.cl"
+#include "inc_common.cl"
+#include "inc_simd.cl"
+#include "inc_hash_sha256.cl"
+#endif
+
+#define COMPARE_S "inc_comp_single.cl"
+#define COMPARE_M "inc_comp_multi.cl"
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_MAX  24
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_sha256_tmp
+{
+  u32 tmp[SNMPV3_TMP_ELEMS];
+  u32 h[SNMPV3_HASH_ELEMS];
+
+} hmac_sha256_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+KERNEL_FQ void m26800_init (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32 w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  u8 *src_ptr = (u8 *) w;
+
+  // password 64 times, also swapped
+
+  u32 dst_buf[16];
+
+  u8 *dst_ptr = (u8 *) dst_buf;
+
+  int tmp_idx = 0;
+
+  for (int i = 0; i < 64; i++)
+  {
+    for (int j = 0; j < pw_len; j++)
+    {
+      const int dst_idx = tmp_idx & 63;
+
+      dst_ptr[dst_idx] = src_ptr[j];
+
+      // write to global memory every time 64 byte are written into cache
+
+      if (dst_idx == 63)
+      {
+        const int tmp_idx4 = (tmp_idx - 63) / 4;
+
+        tmps[gid].tmp[tmp_idx4 +  0] = hc_swap32_S (dst_buf[ 0]);
+        tmps[gid].tmp[tmp_idx4 +  1] = hc_swap32_S (dst_buf[ 1]);
+        tmps[gid].tmp[tmp_idx4 +  2] = hc_swap32_S (dst_buf[ 2]);
+        tmps[gid].tmp[tmp_idx4 +  3] = hc_swap32_S (dst_buf[ 3]);
+        tmps[gid].tmp[tmp_idx4 +  4] = hc_swap32_S (dst_buf[ 4]);
+        tmps[gid].tmp[tmp_idx4 +  5] = hc_swap32_S (dst_buf[ 5]);
+        tmps[gid].tmp[tmp_idx4 +  6] = hc_swap32_S (dst_buf[ 6]);
+        tmps[gid].tmp[tmp_idx4 +  7] = hc_swap32_S (dst_buf[ 7]);
+        tmps[gid].tmp[tmp_idx4 +  8] = hc_swap32_S (dst_buf[ 8]);
+        tmps[gid].tmp[tmp_idx4 +  9] = hc_swap32_S (dst_buf[ 9]);
+        tmps[gid].tmp[tmp_idx4 + 10] = hc_swap32_S (dst_buf[10]);
+        tmps[gid].tmp[tmp_idx4 + 11] = hc_swap32_S (dst_buf[11]);
+        tmps[gid].tmp[tmp_idx4 + 12] = hc_swap32_S (dst_buf[12]);
+        tmps[gid].tmp[tmp_idx4 + 13] = hc_swap32_S (dst_buf[13]);
+        tmps[gid].tmp[tmp_idx4 + 14] = hc_swap32_S (dst_buf[14]);
+        tmps[gid].tmp[tmp_idx4 + 15] = hc_swap32_S (dst_buf[15]);
+      }
+
+      tmp_idx++;
+    }
+  }
+
+  // hash
+
+  tmps[gid].h[0] = SHA256M_A;
+  tmps[gid].h[1] = SHA256M_B;
+  tmps[gid].h[2] = SHA256M_C;
+  tmps[gid].h[3] = SHA256M_D;
+  tmps[gid].h[4] = SHA256M_E;
+  tmps[gid].h[5] = SHA256M_F;
+  tmps[gid].h[6] = SHA256M_G;
+  tmps[gid].h[7] = SHA256M_H;
+}
+
+KERNEL_FQ void m26800_loop (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 h[8];
+
+  h[0] = tmps[gid].h[0];
+  h[1] = tmps[gid].h[1];
+  h[2] = tmps[gid].h[2];
+  h[3] = tmps[gid].h[3];
+  h[4] = tmps[gid].h[4];
+  h[5] = tmps[gid].h[5];
+  h[6] = tmps[gid].h[6];
+  h[7] = tmps[gid].h[7];
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  const int pw_len64 = pw_len * 64;
+
+  #define SNMPV3_TMP_ELEMS_OPT 1024 // 1024 = (64 max pw length * 64) / sizeof (u32)
+                                    // for pw length > 64 we use global memory reads
+
+  if (pw_len < 64)
+  {
+    u32 tmp[SNMPV3_TMP_ELEMS_OPT];
+
+    for (int i = 0; i < pw_len64 / 4; i++)
+    {
+      tmp[i] = tmps[gid].tmp[i];
+    }
+
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      w0[0] = tmp[idx +  0];
+      w0[1] = tmp[idx +  1];
+      w0[2] = tmp[idx +  2];
+      w0[3] = tmp[idx +  3];
+      w1[0] = tmp[idx +  4];
+      w1[1] = tmp[idx +  5];
+      w1[2] = tmp[idx +  6];
+      w1[3] = tmp[idx +  7];
+      w2[0] = tmp[idx +  8];
+      w2[1] = tmp[idx +  9];
+      w2[2] = tmp[idx + 10];
+      w2[3] = tmp[idx + 11];
+      w3[0] = tmp[idx + 12];
+      w3[1] = tmp[idx + 13];
+      w3[2] = tmp[idx + 14];
+      w3[3] = tmp[idx + 15];
+
+      sha256_transform (w0, w1, w2, w3, h);
+    }
+  }
+  else
+  {
+    for (int i = 0, j = loop_pos; i < loop_cnt; i += 64, j += 64)
+    {
+      const int idx = (j % pw_len64) / 4; // the optimization trick is to be able to do this
+
+      u32 w0[4];
+      u32 w1[4];
+      u32 w2[4];
+      u32 w3[4];
+
+      w0[0] = tmps[gid].tmp[idx +  0];
+      w0[1] = tmps[gid].tmp[idx +  1];
+      w0[2] = tmps[gid].tmp[idx +  2];
+      w0[3] = tmps[gid].tmp[idx +  3];
+      w1[0] = tmps[gid].tmp[idx +  4];
+      w1[1] = tmps[gid].tmp[idx +  5];
+      w1[2] = tmps[gid].tmp[idx +  6];
+      w1[3] = tmps[gid].tmp[idx +  7];
+      w2[0] = tmps[gid].tmp[idx +  8];
+      w2[1] = tmps[gid].tmp[idx +  9];
+      w2[2] = tmps[gid].tmp[idx + 10];
+      w2[3] = tmps[gid].tmp[idx + 11];
+      w3[0] = tmps[gid].tmp[idx + 12];
+      w3[1] = tmps[gid].tmp[idx + 13];
+      w3[2] = tmps[gid].tmp[idx + 14];
+      w3[3] = tmps[gid].tmp[idx + 15];
+
+      sha256_transform (w0, w1, w2, w3, h);
+    }
+  }
+
+  tmps[gid].h[0] = h[0];
+  tmps[gid].h[1] = h[1];
+  tmps[gid].h[2] = h[2];
+  tmps[gid].h[3] = h[3];
+  tmps[gid].h[4] = h[4];
+  tmps[gid].h[5] = h[5];
+  tmps[gid].h[6] = h[6];
+  tmps[gid].h[7] = h[7];
+}
+
+KERNEL_FQ void m26800_comp (KERN_ATTR_TMPS_ESALT (hmac_sha256_tmp_t, snmpv3_t))
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  w0[0] = 0x80000000;
+  w0[1] = 0;
+  w0[2] = 0;
+  w0[3] = 0;
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 1048576 * 8;
+
+  u32 h[8];
+
+  h[0] = tmps[gid].h[0];
+  h[1] = tmps[gid].h[1];
+  h[2] = tmps[gid].h[2];
+  h[3] = tmps[gid].h[3];
+  h[4] = tmps[gid].h[4];
+  h[5] = tmps[gid].h[5];
+  h[6] = tmps[gid].h[6];
+  h[7] = tmps[gid].h[7];
+
+  sha256_transform (w0, w1, w2, w3, h);
+
+  sha256_ctx_t ctx;
+
+  sha256_init (&ctx);
+
+  u32 w[16];
+
+  w[ 0] = h[0];
+  w[ 1] = h[1];
+  w[ 2] = h[2];
+  w[ 3] = h[3];
+  w[ 4] = h[4];
+  w[ 5] = h[5];
+  w[ 6] = h[6];
+  w[ 7] = h[7];
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha256_update (&ctx, w, 32);
+
+  sha256_update_global_swap (&ctx, esalt_bufs[DIGESTS_OFFSET].engineID_buf, esalt_bufs[DIGESTS_OFFSET].engineID_len);
+
+  w[ 0] = h[0];
+  w[ 1] = h[1];
+  w[ 2] = h[2];
+  w[ 3] = h[3];
+  w[ 4] = h[4];
+  w[ 5] = h[5];
+  w[ 6] = h[6];
+  w[ 7] = h[7];
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha256_update (&ctx, w, 32);
+
+  sha256_final (&ctx);
+
+  w[ 0] = ctx.h[0];
+  w[ 1] = ctx.h[1];
+  w[ 2] = ctx.h[2];
+  w[ 3] = ctx.h[3];
+  w[ 4] = ctx.h[4];
+  w[ 5] = ctx.h[5];
+  w[ 6] = ctx.h[6];
+  w[ 7] = ctx.h[7];
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  sha256_hmac_ctx_t hmac_ctx;
+
+  sha256_hmac_init (&hmac_ctx, w, 32);
+
+  sha256_hmac_update_global_swap (&hmac_ctx, esalt_bufs[DIGESTS_OFFSET].salt_buf, esalt_bufs[DIGESTS_OFFSET].salt_len);
+
+  sha256_hmac_final (&hmac_ctx);
+
+  const u32 r0 = hmac_ctx.opad.h[DGST_R0];
+  const u32 r1 = hmac_ctx.opad.h[DGST_R1];
+  const u32 r2 = hmac_ctx.opad.h[DGST_R2];
+  const u32 r3 = hmac_ctx.opad.h[DGST_R3];
+
+  #define il_pos 0
+
+  #ifdef KERNEL_STATIC
+  #include COMPARE_M
+  #endif
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index e864683d6..6a5bcf368 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -11,9 +11,12 @@
 ## Bugs
 ##
 
+- Fixed buffer overflow in Stargazer Stellar Wallet XLM module in hash_encode() if a hash was cracked
 - Fixed autotune unitialized tmps variable for slow hashes by calling _init kernel before calling _loop kernel
 - Fixed datatype in function sha384_hmac_init_vector_128() that could come into effect if vector datatype was manually set
 - Fixed false negative in all VeraCrypt hash-modes if both conditions are met: 1. use CPU for cracking and 2. PIM range was used
+- Fixed out-of-boundary read in input_tokenizer() if the signature in the hash is longer than the length of the plugins' signature constant
+- Fixed out-of-boundary read in Stuffit5 module in hash_decode()
 
 ##
 ## Improvements
@@ -46,9 +49,11 @@
 ## Algorithms
 ##
 
-- Added hash-mode: SNMPv3 HMAC-SHA1-96
-- Added hash-mode: SNMPv3 HMAC-MD5-96
 - Added hash-mode: SNMPv3 HMAC-MD5-96/HMAC-SHA1-96
+- Added hash-mode: SNMPv3 HMAC-MD5-96
+- Added hash-mode: SNMPv3 HMAC-SHA1-96
+- Added hash-mode: SNMPv3 HMAC-SHA224-128
+- Added hash-mode: SNMPv3 HMAC-SHA256-192
 
 * changes v6.2.2 -> v6.2.3
 
diff --git a/docs/readme.txt b/docs/readme.txt
index d70edfcb7..f05434151 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -155,9 +155,11 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - SIP digest authentication (MD5)
 - IKE-PSK MD5
 - IKE-PSK SHA1
-- SNMPv3 HMAC-MD5-96
 - SNMPv3 HMAC-MD5-96/HMAC-SHA1-96
+- SNMPv3 HMAC-MD5-96
 - SNMPv3 HMAC-SHA1-96
+- SNMPv3 HMAC-SHA256-192
+- SNMPv3 HMAC-SHA224-128
 - WPA-EAPOL-PBKDF2
 - WPA-EAPOL-PMK
 - WPA-PBKDF2-PMKID+EAPOL
diff --git a/docs/rules.txt b/docs/rules.txt
index b2621bba4..59233d1ad 100644
--- a/docs/rules.txt
+++ b/docs/rules.txt
@@ -5,6 +5,7 @@
 #define RULE_OP_MANGLE_UREST_LFIRST     'C' // upper case all chars, lower case 1st
 #define RULE_OP_MANGLE_TREST            't' // switch the case of each char
 #define RULE_OP_MANGLE_TOGGLE_AT        'T' // switch the case of each char on pos N
+#define RULE_OP_MANGLE_TOGGLE_AT_SEP    '3' // switch the case of the first letter after occurrence N of char X
 #define RULE_OP_MANGLE_REVERSE          'r' // reverse word
 #define RULE_OP_MANGLE_DUPEWORD         'd' // append word to itself
 #define RULE_OP_MANGLE_DUPEWORD_TIMES   'p' // append word to itself N times
diff --git a/include/types.h b/include/types.h
index a8564cc9d..0cb440e3a 100644
--- a/include/types.h
+++ b/include/types.h
@@ -295,6 +295,7 @@ typedef enum rule_functions
   RULE_OP_MANGLE_UREST_LFIRST    = 'C',
   RULE_OP_MANGLE_TREST           = 't',
   RULE_OP_MANGLE_TOGGLE_AT       = 'T',
+  RULE_OP_MANGLE_TOGGLE_AT_SEP   = '3',
   RULE_OP_MANGLE_REVERSE         = 'r',
   RULE_OP_MANGLE_DUPEWORD        = 'd',
   RULE_OP_MANGLE_DUPEWORD_TIMES  = 'p',
diff --git a/src/autotune.c b/src/autotune.c
index 8941c71b9..9f2ac312c 100644
--- a/src/autotune.c
+++ b/src/autotune.c
@@ -10,7 +10,7 @@
 #include "status.h"
 #include "autotune.h"
 
-static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
+static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops, const u32 kernel_threads)
 {
   hashconfig_t   *hashconfig   = hashcat_ctx->hashconfig;
   user_options_t *user_options = hashcat_ctx->user_options;
@@ -19,7 +19,9 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set
   device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
 
-  u32 kernel_power_try = device_param->hardware_power * kernel_accel;
+  const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads;
+
+  u32 kernel_power_try = hardware_power * kernel_accel;
 
   if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)
   {
@@ -33,6 +35,10 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
     }
   }
 
+  const u32 kernel_threads_sav = device_param->kernel_threads;
+
+  device_param->kernel_threads = kernel_threads;
+
   const double spin_damp_sav = device_param->spin_damp;
 
   device_param->spin_damp = 0;
@@ -50,71 +56,51 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
   }
   else
   {
-    run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_try, true, 0);
-
-    if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
-    {
-      run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, 0, kernel_power_try, true, 0);
-    }
-
     run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 0, kernel_power_try, true, 0);
   }
 
   device_param->spin_damp = spin_damp_sav;
 
+  device_param->kernel_threads = kernel_threads_sav;
+
   const double exec_msec_prev = get_avg_exec_time (device_param, 1);
 
   return exec_msec_prev;
 }
 
-/*
-static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
+static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops, const u32 kernel_threads, const int times)
 {
-  hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
-
-  device_param->kernel_params_buf32[28] = 0;
-  device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set
-  device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
+  double exec_msec_best = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
 
-  const u32 kernel_power_try = device_param->hardware_power * kernel_accel;
-
-  const u32 kernel_threads_sav = device_param->kernel_threads;
-
-  const double spin_damp_sav = device_param->spin_damp;
-
-  device_param->spin_damp = 0;
-
-  if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+  for (int i = 1; i < times; i++)
   {
-    if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
-    {
-      device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple1;
+    double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
 
-      run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_try, true, 0);
-    }
-    else
-    {
-      device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple4;
+    if (exec_msec > exec_msec_best) continue;
 
-      run_kernel (hashcat_ctx, device_param, KERN_RUN_4, 0, kernel_power_try, true, 0);
-    }
+    exec_msec_best = exec_msec;
   }
-  else
-  {
-    device_param->kernel_threads = device_param->kernel_preferred_wgs_multiple2;
 
-    run_kernel (hashcat_ctx, device_param, KERN_RUN_2, 0, kernel_power_try, true, 0);
-  }
+  return exec_msec_best;
+}
 
-  device_param->kernel_threads = kernel_threads_sav;
+static u32 previous_power_of_two (const u32 x)
+{
+  // https://stackoverflow.com/questions/2679815/previous-power-of-2
+  // really cool!
 
-  device_param->spin_damp = spin_damp_sav;
+  if (x == 0) return 0;
 
-  const double exec_msec_prev = get_avg_exec_time (device_param, 1);
+  u32 r = x;
 
-  return exec_msec_prev;
+  r |= (r >>  1);
+  r |= (r >>  2);
+  r |= (r >>  4);
+  r |= (r >>  8);
+  r |= (r >> 16);
+
+  return r - (r >> 1);
 }
-*/
 
 static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
 {
@@ -131,9 +117,57 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
   const u32 kernel_loops_min = device_param->kernel_loops_min;
   const u32 kernel_loops_max = device_param->kernel_loops_max;
 
+  const u32 kernel_threads_min = device_param->kernel_threads_min;
+  const u32 kernel_threads_max = device_param->kernel_threads_max;
+
   u32 kernel_accel = kernel_accel_min;
   u32 kernel_loops = kernel_loops_min;
 
+  // for the threads we take as initial value what we receive from the runtime
+  // but is only to start with something, we will fine tune this value as soon as we have our workload specified
+  // this thread limiting is also performed insinde run_kernel() so we need to redo it here, too
+
+  u32 kernel_wgs = 0;
+  u32 kernel_wgs_multiple = 0;
+
+  if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+  {
+    if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
+    {
+      kernel_wgs = device_param->kernel_wgs1;
+
+      kernel_wgs_multiple = device_param->kernel_preferred_wgs_multiple1;
+    }
+    else
+    {
+      kernel_wgs = device_param->kernel_wgs4;
+
+      kernel_wgs_multiple = device_param->kernel_preferred_wgs_multiple4;
+    }
+  }
+  else
+  {
+    kernel_wgs = device_param->kernel_wgs2;
+
+    kernel_wgs_multiple = device_param->kernel_preferred_wgs_multiple2;
+  }
+
+  u32 kernel_threads = kernel_threads_max;
+
+  if ((kernel_wgs >= kernel_threads_min) && (kernel_wgs <= kernel_threads_max))
+  {
+    kernel_threads = kernel_wgs;
+  }
+
+  // having a value power of 2 makes it easier to divide
+
+  const u32 kernel_threads_p2 = previous_power_of_two (kernel_threads);
+
+  if ((kernel_threads_p2 >= kernel_threads_min) && (kernel_threads_p2 <= kernel_threads_max))
+  {
+    kernel_threads = kernel_threads_p2;
+  }
+
   // in this case the user specified a fixed -n and -u on the commandline
   // no way to tune anything
   // but we need to run a few caching rounds
@@ -149,10 +183,10 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
     if (hashconfig->warmup_disable == false)
     {
-      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
-      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
-      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
-      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
+      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
+      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
+      try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads);
     }
 
     #endif
@@ -206,13 +240,37 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       }
     }
 
+    // we also need to initialize some values using kernels
+
+    if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
+    {
+      // nothing to do
+    }
+    else
+    {
+      const u32 kernel_threads_sav = device_param->kernel_threads;
+
+      device_param->kernel_threads = device_param->kernel_wgs1;
+
+      run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_max, false, 0);
+
+      if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
+      {
+        device_param->kernel_threads = device_param->kernel_wgs2p;
+
+        run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, 0, kernel_power_max, false, 0);
+      }
+
+      device_param->kernel_threads = kernel_threads_sav;
+    }
+
     // Do a pre-autotune test run to find out if kernel runtime is above some TDR limit
 
     u32 kernel_loops_max_reduced = kernel_loops_max;
 
     if (true)
     {
-      double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
+      double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min, kernel_threads);
 
       if (exec_msec > 2000)
       {
@@ -221,7 +279,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         return -1;
       }
 
-      exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
+      exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min, kernel_threads);
 
       const u32 mm = kernel_loops_max / kernel_loops_min;
 
@@ -241,16 +299,16 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       {
         if (kernel_loops > kernel_loops_max_reduced) continue;
 
-        double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops);
+        double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops, kernel_threads, 1);
 
         if (exec_msec < target_msec) break;
       }
     }
 
-    // now the same for kernel-accel but with the new kernel-loops from previous loop set
-
     #define STEPS_CNT 16
 
+    // now the same for kernel-accel but with the new kernel-loops from previous loop set
+
     if (kernel_accel_min < kernel_accel_max)
     {
       for (int i = 0; i < STEPS_CNT; i++)
@@ -260,7 +318,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
         if (kernel_accel_try < kernel_accel_min) continue;
         if (kernel_accel_try > kernel_accel_max) break;
 
-        double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops);
+        double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_try, kernel_loops, kernel_threads, 1);
 
         if (exec_msec > target_msec) break;
 
@@ -276,7 +334,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       const u32 kernel_accel_orig = kernel_accel;
       const u32 kernel_loops_orig = kernel_loops;
 
-      double exec_msec_prev = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+      double exec_msec_prev = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 1);
 
       for (int i = 1; i < STEPS_CNT; i++)
       {
@@ -291,7 +349,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
         // do a real test
 
-        const double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try);
+        const double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try, kernel_threads, 1);
 
         if (exec_msec_prev < exec_msec) break;
 
@@ -308,7 +366,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       }
     }
 
-    double exec_msec_pre_final = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+    double exec_msec_pre_final = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 1);
 
     const u32 exec_left = (const u32) (target_msec / exec_msec_pre_final);
 
@@ -323,46 +381,43 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
       kernel_accel *= exec_accel_min;
     }
 
-    // start finding best thread count is easier.
-    // it's either the preferred or the maximum thread count
+    // v6.2.4 new section: find thread count
+    // This is not as effective as it could be because of inaccurate kernel return timers
+    // But is better than fixed values
+    // Timers in this section are critical, so we rerun meassurements 3 times
 
-    /*
-    const u32 kernel_threads_min = device_param->kernel_threads_min;
-    const u32 kernel_threads_max = device_param->kernel_threads_max;
-
-    if (kernel_threads_min < kernel_threads_max)
+    if (kernel_threads_max > kernel_threads_min)
     {
-      const double exec_msec_max = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+      const u32 kernel_accel_orig   = kernel_accel;
+      const u32 kernel_threads_orig = kernel_threads;
 
-      u32 preferred_threads = 0;
+      double exec_msec_prev = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 3);
 
-      if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
-      {
-        if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
-        {
-          preferred_threads = device_param->kernel_preferred_wgs_multiple1;
-        }
-        else
-        {
-          preferred_threads = device_param->kernel_preferred_wgs_multiple4;
-        }
-      }
-      else
+      for (int i = 1; i < STEPS_CNT; i++)
       {
-        preferred_threads = device_param->kernel_preferred_wgs_multiple2;
-      }
+        const u32 kernel_accel_try   = kernel_accel_orig   * (1U << i);
+        const u32 kernel_threads_try = kernel_threads_orig / (1U << i);
 
-      if ((preferred_threads >= kernel_threads_min) && (preferred_threads <= kernel_threads_max))
-      {
-        const double exec_msec_preferred = try_run_preferred (hashcat_ctx, device_param, kernel_accel, kernel_loops);
+        // since we do not modify total amount of workitems, we can (and need) to do increase kernel_accel_max
 
-        if (exec_msec_preferred < exec_msec_max)
-        {
-          device_param->kernel_threads = preferred_threads;
-        }
+        const u32 kernel_accel_max_try = kernel_accel_max * (1U << i);
+
+        if (kernel_accel_try > kernel_accel_max_try) break;
+
+        if (kernel_threads_try < kernel_threads_min) break;
+
+        if (kernel_threads_try % kernel_wgs_multiple) break; // this would just be waste of time
+
+        double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_try, kernel_loops, kernel_threads_try, 3);
+
+        if (exec_msec > exec_msec_prev) continue;
+
+        exec_msec_prev = exec_msec;
+
+        kernel_accel   = kernel_accel_try;
+        kernel_threads = kernel_threads_try;
       }
     }
-    */
   }
 
   // reset them fake words
@@ -428,8 +483,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   // store
 
-  device_param->kernel_accel = kernel_accel;
-  device_param->kernel_loops = kernel_loops;
+  device_param->kernel_accel   = kernel_accel;
+  device_param->kernel_loops   = kernel_loops;
+  device_param->kernel_threads = kernel_threads;
+
+  const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads;
+
+  device_param->hardware_power = hardware_power;
 
   const u32 kernel_power = device_param->hardware_power * device_param->kernel_accel;
 
diff --git a/src/backend.c b/src/backend.c
index 4801acf38..ec2890aca 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -5328,6 +5328,8 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
 
     if (hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event2) == -1) return -1;
 
+    if (hc_cuEventSynchronize (hashcat_ctx, device_param->cuda_event1) == -1) return -1;
+
     float exec_ms;
 
     if (hc_cuEventElapsedTime (hashcat_ctx, &exec_ms, device_param->cuda_event1, device_param->cuda_event2) == -1) return -1;
@@ -8979,6 +8981,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           }
         }
 
+        if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+        {
+          // they like this
+
+          device_param->kernel_preferred_wgs_multiple = 1;
+        }
+
         if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
         {
           if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_vendor_id == VENDOR_ID_AMD))
@@ -9997,61 +10006,6 @@ static int get_opencl_kernel_dynamic_local_mem_size (hashcat_ctx_t *hashcat_ctx,
   return 0;
 }
 
-static u32 get_kernel_threads (const hc_device_param_t *device_param)
-{
-  // this is an upper limit, a good start, since our strategy is to reduce thread counts only.
-
-  u32 kernel_threads_min = device_param->kernel_threads_min;
-  u32 kernel_threads_max = device_param->kernel_threads_max;
-
-  // the changes we do here are just optimizations, since the module always has priority.
-
-  const u32 device_maxworkgroup_size = (const u32) device_param->device_maxworkgroup_size;
-
-  kernel_threads_max = MIN (kernel_threads_max, device_maxworkgroup_size);
-
-  if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
-  {
-    // for all CPU we just do 1 ...
-
-    kernel_threads_max = MIN (kernel_threads_max, 1);
-  }
-  else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
-  {
-    // for GPU we need to distinguish by vendor
-
-    if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
-    {
-      kernel_threads_max = MIN (kernel_threads_max, 8);
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
-    {
-      if (device_param->kernel_preferred_wgs_multiple == 64)
-      {
-        // only older AMD GPUs with WaveFront size 64 benefit from this
-
-        kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
-      }
-    }
-    else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
-    {
-      if (device_param->kernel_preferred_wgs_multiple == 64)
-      {
-        // only older AMD GPUs with WaveFront size 64 benefit from this
-
-        kernel_threads_max = MIN (kernel_threads_max, device_param->kernel_preferred_wgs_multiple);
-      }
-    }
-  }
-
-  // this is intenionally! at this point, kernel_threads_min can be higher than kernel_threads_max.
-  // in this case we actually want kernel_threads_min selected.
-
-  const u32 kernel_threads = MAX (kernel_threads_min, kernel_threads_max);
-
-  return kernel_threads;
-}
-
 static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const char *kernel_name, char *source_file, char *cached_file, const char *build_options_buf, const bool cache_disable, cl_program *opencl_program, CUmodule *cuda_module, hipModule_t *hip_module)
 {
   const hashconfig_t    *hashconfig    = hashcat_ctx->hashconfig;
@@ -10090,8 +10044,6 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
   if (cached == false)
   {
     #if defined (DEBUG)
-    const user_options_t *user_options = hashcat_ctx->user_options;
-
     if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s not found in cache. Please be patient...", device_param->device_id + 1, filename_from_filepath (cached_file));
     #endif
 
@@ -10344,7 +10296,7 @@ static bool load_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_p
       //hiprtc_options[1] = "--device-as-default-execution-space";
       //hiprtc_options[2] = "--gpu-architecture";
 
-      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
+      hc_asprintf (&hiprtc_options[0], "--gpu-max-threads-per-block=%d", (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
 
       hiprtc_options[1] = "-nocudainc";
       hiprtc_options[2] = "-nocudalib";
@@ -10990,6 +10942,19 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       }
     }
 
+    // this seems to work always
+
+    if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
+    {
+      u32 native_threads = 1;
+
+      if ((native_threads >= device_param->kernel_threads_min) && (native_threads <= device_param->kernel_threads_max))
+      {
+        device_param->kernel_threads_min = native_threads;
+        device_param->kernel_threads_max = native_threads;
+      }
+    }
+
     /**
      * create context for each device
      */
@@ -11319,7 +11284,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       device_param->device_name,
       device_param->opencl_device_version,
       device_param->opencl_driver_version,
-      (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX));
+      (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max);
 
     md5_ctx_t md5_ctx;
 
@@ -11654,7 +11619,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
         device_param->vector_width,
         hashconfig->kern_type,
         extra_value,
-        (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : ((device_param->kernel_preferred_wgs_multiple == 64) ? 64 : KERNEL_THREADS_MAX),
+        (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : device_param->kernel_threads_max,
         build_options_module_buf);
 
       md5_ctx_t md5_ctx;
@@ -14381,7 +14346,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
      * now everything that depends on threads and accel, basically dynamic workload
      */
 
-    u32 kernel_threads = get_kernel_threads (device_param);
+    //    u32 kernel_threads = get_kernel_threads (device_param);
 
     if (user_options->attack_mode == ATTACK_MODE_ASSOCIATION)
     {
@@ -14389,12 +14354,14 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       // in autotune. in this attack mode kernel_power is limited by salts_cnt so we
       // do not have a lot of options left.
 
-      kernel_threads = MIN (kernel_threads, 64);
+      device_param->kernel_threads_min = MIN (device_param->kernel_threads_min, 64);
+      device_param->kernel_threads_max = MIN (device_param->kernel_threads_max, 64);
     }
 
-    device_param->kernel_threads = kernel_threads;
+    //    device_param->kernel_threads = kernel_threads;
+    device_param->kernel_threads = 0;
 
-    device_param->hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_processors) * kernel_threads;
+    device_param->hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_processors) * device_param->kernel_threads_max;
 
     u32 kernel_accel_min = device_param->kernel_accel_min;
     u32 kernel_accel_max = device_param->kernel_accel_max;
@@ -14520,6 +14487,47 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
       if ((size_tmps  + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
       if ((size_hooks + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
 
+      // work around, for some reason apple opencl can't have buffers larger 2^31
+      // typically runs into trap 6
+      // maybe 32/64 bit problem affecting size_t?
+
+      if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
+      {
+        const size_t undocumented_single_allocation_apple = 0x7fffffff;
+
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (bitmap_ctx->bitmap_size > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_bfs                > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_combs              > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_digests            > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_esalts             > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_hooks              > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_markov_css         > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_plains             > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_pws                > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_pws_amp            > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_pws_comp           > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_pws_idx            > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_results            > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_root_css           > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_rules              > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_rules_c            > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_salts              > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_extra_buffer       > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_shown              > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_tm                 > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_tmps               > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_st_digests         > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_st_salts           > undocumented_single_allocation_apple) memory_limit_hit = 1;
+        if (size_st_esalts          > undocumented_single_allocation_apple) memory_limit_hit = 1;
+      }
+
       const u64 size_total
         = bitmap_ctx->bitmap_size
         + bitmap_ctx->bitmap_size
diff --git a/src/modules/module_01500.c b/src/modules/module_01500.c
index dc7b7b47e..ca0e90232 100644
--- a/src/modules/module_01500.c
+++ b/src/modules/module_01500.c
@@ -95,13 +95,6 @@ int module_build_plain_postprocess (MAYBE_UNUSED const hashconfig_t *hashconfig,
   return src_len;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance only optimization
-
-  return kernel_threads_max;
-}
-
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   u32 kernel_loops_max = KERNEL_LOOPS_MAX;
@@ -345,7 +338,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_03000.c b/src/modules/module_03000.c
index c9b616ab5..cc091a5e5 100644
--- a/src/modules/module_03000.c
+++ b/src/modules/module_03000.c
@@ -96,13 +96,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   return jit_build_options;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance only optimization
-
-  return kernel_threads_max;
-}
-
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   u32 kernel_loops_max = KERNEL_LOOPS_MAX;
@@ -269,7 +262,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_12500.c b/src/modules/module_12500.c
index fda848e0a..a095b8f78 100644
--- a/src/modules/module_12500.c
+++ b/src/modules/module_12500.c
@@ -88,15 +88,6 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
   return kernel_loops_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  // -T 128 works slightly faster but it's free for the user to change
-
-  const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : 128;
-
-  return kernel_threads_max;
-}
-
 u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool optimized_kernel = (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL);
@@ -256,7 +247,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_14000.c b/src/modules/module_14000.c
index 013888bcc..abe8d259e 100644
--- a/src/modules/module_14000.c
+++ b/src/modules/module_14000.c
@@ -91,13 +91,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   return jit_build_options;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = 64; // performance only optimization
-
-  return kernel_threads_max;
-}
-
 u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   u32 kernel_loops_max = KERNEL_LOOPS_MAX;
@@ -300,7 +293,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_14600.c b/src/modules/module_14600.c
index 0ff26ac67..755fef2ff 100644
--- a/src/modules/module_14600.c
+++ b/src/modules/module_14600.c
@@ -179,17 +179,6 @@ typedef struct luks_tmp
 
 } luks_tmp_t;
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  // the module requires a lot of registers for key schedulers on _comp kernel.
-  // it's possible, if using too many threads, there's not enough registers available, typically ending with misleading error message:
-  // cuLaunchKernel(): out of memory
-
-  const u32 kernel_threads_max = 64;
-
-  return kernel_threads_max;
-}
-
 void *module_benchmark_esalt (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   luks_t *luks = (luks_t *) hcmalloc (sizeof (luks_t));
@@ -655,7 +644,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = module_kern_type_dynamic;
diff --git a/src/modules/module_23700.c b/src/modules/module_23700.c
index d3803b0eb..26a24c16f 100644
--- a/src/modules/module_23700.c
+++ b/src/modules/module_23700.c
@@ -105,15 +105,6 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
   return kernel_loops_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  // -T 128 works slightly faster but it's free for the user to change
-
-  const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : 128;
-
-  return kernel_threads_max;
-}
-
 u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool optimized_kernel = (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL);
@@ -377,7 +368,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_23800.c b/src/modules/module_23800.c
index f910b361f..1ca597082 100644
--- a/src/modules/module_23800.c
+++ b/src/modules/module_23800.c
@@ -388,15 +388,6 @@ u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_
   return kernel_loops_max;
 }
 
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  // -T 128 works slightly faster but it's free for the user to change
-
-  const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : 128;
-
-  return kernel_threads_max;
-}
-
 u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const bool optimized_kernel = (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL);
@@ -654,7 +645,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
   module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_24700.c b/src/modules/module_24700.c
index 04163dc37..e50df1c2f 100644
--- a/src/modules/module_24700.c
+++ b/src/modules/module_24700.c
@@ -52,9 +52,8 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   token.token_cnt  = 1;
 
-  token.len_min[0] = 10;
-  token.len_max[0] = 10;
-  token.attr[0]    = TOKEN_ATTR_VERIFY_LENGTH
+  token.len[0]     = 10;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
                    | TOKEN_ATTR_VERIFY_HEX;
 
   const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
@@ -62,9 +61,14 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
   if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
 
   const u8 *hash_pos = token.buf[0];
+  const u32 hash_len = token.len[0];
 
-  digest[0] = hex_to_u32 (hash_pos +  0);
-  digest[1] = hex_to_u32 (hash_pos +  8);
+  u8 digest_tmp[16] = { 0 };
+
+  memcpy (digest_tmp, hash_pos, hash_len);
+
+  digest[0] = hex_to_u32 (digest_tmp +  0);
+  digest[1] = hex_to_u32 (digest_tmp +  8);
 
   if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
   {
diff --git a/src/modules/module_25500.c b/src/modules/module_25500.c
index e6853e951..f858e3062 100644
--- a/src/modules/module_25500.c
+++ b/src/modules/module_25500.c
@@ -247,9 +247,9 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   // salt
 
-  #define SALT_LEN_BASE64 ((16 * 8) / 6) + 3
-  #define IV_LEN_BASE64   ((12 * 8) / 6) + 3
-  #define CT_LEN_BASE64   ((72 * 8) / 6) + 3
+  #define SALT_LEN_BASE64 ((16 * 8) / 6) + 3 + 1 // 25 vs 24
+  #define IV_LEN_BASE64   ((12 * 8) / 6) + 1     // 17 vs 16
+  #define CT_LEN_BASE64   ((72 * 8) / 6) + 1     // 97 vs 96
 
   u8 salt_buf[SALT_LEN_BASE64] = { 0 };
 
diff --git a/src/modules/module_26700.c b/src/modules/module_26700.c
new file mode 100644
index 000000000..c64f269cf
--- /dev/null
+++ b/src/modules/module_26700.c
@@ -0,0 +1,336 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+#include "memory.h"
+#include "emu_inc_hash_sha1.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_4;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
+static const char *HASH_NAME      = "SNMPv3 HMAC-SHA224-128";
+static const u64   KERN_TYPE      = 26700;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
+static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
+static const char *ST_PASS        = "hashcat1";
+static const char *ST_HASH        = "$SNMPv3$3$45889431$308197020103301102047aa1a79e020300ffe30401010201030440303e041180001f88808106d566db57fd600000000002011002020118040e6d61747269785f5348412d3232340410000000000000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a2260204272f76620201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$2f7a3891dd2e27d3f567e4d6d0257962";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+static const char *SIGNATURE_SNMPV3 = "$SNMPv3$3$";
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  16
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_sha224_tmp
+{
+  u32 tmp[SNMPV3_TMP_ELEMS];
+  u32 h[SNMPV3_HASH_ELEMS];
+
+} hmac_sha224_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 pw_min = 8;
+
+  return pw_min;
+}
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (snmpv3_t);
+
+  return esalt_size;
+}
+
+u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = (const u64) sizeof (hmac_sha224_tmp_t);
+
+  return tmp_size;
+}
+
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  // we need to fix iteration count to guarantee the loop count is a multiple of 64
+  // 2k calls to sha224_transform typically is enough to overtime pcie bottleneck
+
+  const u32 kernel_loops_min = 2048 * 64;
+
+  return kernel_loops_min;
+}
+
+u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_max = 2048 * 64;
+
+  return kernel_loops_max;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  token_t token;
+
+  token.token_cnt  = 5;
+  token.signatures_cnt    = 1;
+  token.signatures_buf[0] = SIGNATURE_SNMPV3;
+
+  token.len[0]     = 10;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  // packet number
+  token.len_min[1] = 1;
+  token.len_max[1] = 8;
+  token.sep[1]     = '$';
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+  // salt
+  token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
+  token.len_max[2] = SNMPV3_SALT_MAX * 2;
+  token.sep[2]     = '$';
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  // engineid
+  token.len_min[3] = 26;
+  token.len_max[3] = SNMPV3_ENGINEID_MAX;
+  token.sep[3]     = '$';
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  // digest
+  token.len[4]     = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
+  token.sep[4]     = '$';
+  token.attr[4]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  // packet number
+
+  const u8 *packet_number_pos = token.buf[1];
+  const int packet_number_len = token.len[1];
+
+  memset (snmpv3->packet_number, 0, sizeof (snmpv3->packet_number));
+
+  strncpy ((char *) snmpv3->packet_number, (char *) packet_number_pos, packet_number_len);
+
+  // salt
+
+  const u8 *salt_pos = token.buf[2];
+  const int salt_len = token.len[2];
+
+  u8 *salt_ptr = (u8 *) snmpv3->salt_buf;
+
+  snmpv3->salt_len = hex_decode (salt_pos, salt_len, salt_ptr);
+
+  salt->salt_iter = SNMPV3_ROUNDS;
+
+  // handle unique salts detection
+
+  sha1_ctx_t sha1_ctx;
+
+  sha1_init   (&sha1_ctx);
+  sha1_update (&sha1_ctx, snmpv3->salt_buf, snmpv3->salt_len);
+  sha1_final  (&sha1_ctx);
+
+  // store sha1(snmpv3->salt_buf) in salt_buf
+
+  salt->salt_len = 20;
+
+  memcpy (salt->salt_buf, sha1_ctx.h, salt->salt_len);
+
+  // engineid
+
+  const u8 *engineID_pos = token.buf[3];
+  const int engineID_len = token.len[3];
+
+  u8 *engineID_ptr = (u8 *) snmpv3->engineID_buf;
+
+  snmpv3->engineID_len = hex_decode (engineID_pos, engineID_len, engineID_ptr);
+
+  // digest
+
+  const u8 *hash_pos = token.buf[4];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+
+  digest[0] = byte_swap_32 (digest[0]);
+  digest[1] = byte_swap_32 (digest[1]);
+  digest[2] = byte_swap_32 (digest[2]);
+  digest[3] = byte_swap_32 (digest[3]);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = snprintf (line_buf, line_size, "%s%s$", SIGNATURE_SNMPV3, (char *) snmpv3->packet_number);
+
+  out_len += hex_encode ((u8 *) snmpv3->salt_buf, snmpv3->salt_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  out_len += hex_encode ((u8 *) snmpv3->engineID_buf, snmpv3->engineID_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  u32 digest_tmp[4];
+
+  digest_tmp[0] = byte_swap_32 (digest[0]);
+  digest_tmp[1] = byte_swap_32 (digest[1]);
+  digest_tmp[2] = byte_swap_32 (digest[2]);
+  digest_tmp[3] = byte_swap_32 (digest[3]);
+
+  u32_to_hex (digest_tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[3], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = 0;
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = module_pw_min;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/src/modules/module_26800.c b/src/modules/module_26800.c
new file mode 100644
index 000000000..1eb4aac43
--- /dev/null
+++ b/src/modules/module_26800.c
@@ -0,0 +1,344 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+#include "memory.h"
+#include "emu_inc_hash_sha1.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_6;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_NETWORK_PROTOCOL;
+static const char *HASH_NAME      = "SNMPv3 HMAC-SHA256-192";
+static const u64   KERN_TYPE      = 26800;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
+static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
+static const char *ST_PASS        = "hashcat1";
+static const char *ST_HASH        = "$SNMPv3$4$45889431$30819f020103301102047fc51818020300ffe304010102010304483046041180001f88808106d566db57fd600000000002011002020118040e6d61747269785f5348412d32353604180000000000000000000000000000000000000000000000000400303d041180001f88808106d566db57fd60000000000400a22602040efec2600201000201003018301606082b06010201010200060a2b06010401bf0803020a$80001f88808106d566db57fd6000000000$36d655bfeb59e933845db47d719b68ac7bc59ec087eb89a0";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+static const char *SIGNATURE_SNMPV3 = "$SNMPv3$4$";
+
+#define SNMPV3_SALT_MAX             1500
+#define SNMPV3_ENGINEID_MAX         34
+#define SNMPV3_MSG_AUTH_PARAMS_LEN  24
+#define SNMPV3_ROUNDS               1048576
+#define SNMPV3_MAX_PW_LENGTH        64
+
+#define SNMPV3_TMP_ELEMS            4096 // 4096 = (256 (max pw length) * 64) / sizeof (u32)
+#define SNMPV3_HASH_ELEMS           8
+
+#define SNMPV3_MAX_SALT_ELEMS       512 // 512 * 4 = 2048 > 1500, also has to be multiple of 64
+#define SNMPV3_MAX_ENGINE_ELEMS     16  // 16 * 4 = 64 > 32, also has to be multiple of 64
+#define SNMPV3_MAX_PNUM_ELEMS       4   // 4 * 4 = 16 > 9
+
+typedef struct hmac_sha256_tmp
+{
+  u32 tmp[SNMPV3_TMP_ELEMS];
+  u32 h[SNMPV3_HASH_ELEMS];
+
+} hmac_sha256_tmp_t;
+
+typedef struct snmpv3
+{
+  u32 salt_buf[SNMPV3_MAX_SALT_ELEMS];
+  u32 salt_len;
+
+  u32 engineID_buf[SNMPV3_MAX_ENGINE_ELEMS];
+  u32 engineID_len;
+
+  u32 packet_number[SNMPV3_MAX_PNUM_ELEMS];
+
+} snmpv3_t;
+
+u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 pw_min = 8;
+
+  return pw_min;
+}
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (snmpv3_t);
+
+  return esalt_size;
+}
+
+u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = (const u64) sizeof (hmac_sha256_tmp_t);
+
+  return tmp_size;
+}
+
+u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  // we need to fix iteration count to guarantee the loop count is a multiple of 64
+  // 2k calls to sha256_transform typically is enough to overtime pcie bottleneck
+
+  const u32 kernel_loops_min = 2048 * 64;
+
+  return kernel_loops_min;
+}
+
+u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u32 kernel_loops_max = 2048 * 64;
+
+  return kernel_loops_max;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  token_t token;
+
+  token.token_cnt  = 5;
+  token.signatures_cnt    = 1;
+  token.signatures_buf[0] = SIGNATURE_SNMPV3;
+
+  token.len[0]     = 10;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  // packet number
+  token.len_min[1] = 1;
+  token.len_max[1] = 8;
+  token.sep[1]     = '$';
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+  // salt
+  token.len_min[2] = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
+  token.len_max[2] = SNMPV3_SALT_MAX * 2;
+  token.sep[2]     = '$';
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  // engineid
+  token.len_min[3] = 26;
+  token.len_max[3] = SNMPV3_ENGINEID_MAX;
+  token.sep[3]     = '$';
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  // digest
+  token.len[4]     = SNMPV3_MSG_AUTH_PARAMS_LEN * 2;
+  token.sep[4]     = '$';
+  token.attr[4]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  // packet number
+
+  const u8 *packet_number_pos = token.buf[1];
+  const int packet_number_len = token.len[1];
+
+  memset (snmpv3->packet_number, 0, sizeof (snmpv3->packet_number));
+
+  strncpy ((char *) snmpv3->packet_number, (char *) packet_number_pos, packet_number_len);
+
+  // salt
+
+  const u8 *salt_pos = token.buf[2];
+  const int salt_len = token.len[2];
+
+  u8 *salt_ptr = (u8 *) snmpv3->salt_buf;
+
+  snmpv3->salt_len = hex_decode (salt_pos, salt_len, salt_ptr);
+
+  salt->salt_iter = SNMPV3_ROUNDS;
+
+  // handle unique salts detection
+
+  sha1_ctx_t sha1_ctx;
+
+  sha1_init   (&sha1_ctx);
+  sha1_update (&sha1_ctx, snmpv3->salt_buf, snmpv3->salt_len);
+  sha1_final  (&sha1_ctx);
+
+  // store sha1(snmpv3->salt_buf) in salt_buf
+
+  salt->salt_len = 20;
+
+  memcpy (salt->salt_buf, sha1_ctx.h, salt->salt_len);
+
+  // engineid
+
+  const u8 *engineID_pos = token.buf[3];
+  const int engineID_len = token.len[3];
+
+  u8 *engineID_ptr = (u8 *) snmpv3->engineID_buf;
+
+  snmpv3->engineID_len = hex_decode (engineID_pos, engineID_len, engineID_ptr);
+
+  // digest
+
+  const u8 *hash_pos = token.buf[4];
+
+  digest[0] = hex_to_u32 (hash_pos +  0);
+  digest[1] = hex_to_u32 (hash_pos +  8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+  digest[4] = hex_to_u32 (hash_pos + 32);
+  digest[5] = hex_to_u32 (hash_pos + 40);
+
+  digest[0] = byte_swap_32 (digest[0]);
+  digest[1] = byte_swap_32 (digest[1]);
+  digest[2] = byte_swap_32 (digest[2]);
+  digest[3] = byte_swap_32 (digest[3]);
+  digest[4] = byte_swap_32 (digest[4]);
+  digest[5] = byte_swap_32 (digest[5]);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  snmpv3_t *snmpv3 = (snmpv3_t *) esalt_buf;
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = snprintf (line_buf, line_size, "%s%s$", SIGNATURE_SNMPV3, (char *) snmpv3->packet_number);
+
+  out_len += hex_encode ((u8 *) snmpv3->salt_buf, snmpv3->salt_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  out_len += hex_encode ((u8 *) snmpv3->engineID_buf, snmpv3->engineID_len, out_buf + out_len);
+
+  out_buf[out_len] = '$';
+
+  out_len++;
+
+  u32 digest_tmp[6];
+
+  digest_tmp[0] = byte_swap_32 (digest[0]);
+  digest_tmp[1] = byte_swap_32 (digest[1]);
+  digest_tmp[2] = byte_swap_32 (digest[2]);
+  digest_tmp[3] = byte_swap_32 (digest[3]);
+  digest_tmp[4] = byte_swap_32 (digest[4]);
+  digest_tmp[5] = byte_swap_32 (digest[5]);
+
+  u32_to_hex (digest_tmp[0], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[1], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[2], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[3], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[4], out_buf + out_len); out_len += 8;
+  u32_to_hex (digest_tmp[5], out_buf + out_len); out_len += 8;
+
+  out_buf[out_len] = 0;
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init    = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term    = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = module_kernel_loops_max;
+  module_ctx->module_kernel_loops_min         = module_kernel_loops_min;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = MODULE_DEFAULT;
+  module_ctx->module_pw_min                   = module_pw_min;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/src/rp.c b/src/rp.c
index 019b6333c..811546d37 100644
--- a/src/rp.c
+++ b/src/rp.c
@@ -71,7 +71,8 @@ static const char grp_op_chr_chr[] =
 static const char grp_op_pos_chr[] =
 {
   RULE_OP_MANGLE_INSERT,
-  RULE_OP_MANGLE_OVERSTRIKE
+  RULE_OP_MANGLE_OVERSTRIKE,
+  RULE_OP_MANGLE_TOGGLE_AT_SEP
 };
 
 static const char grp_op_pos_pos0[] =
@@ -444,12 +445,18 @@ int cpu_rule_to_kernel_rule (char *rule_buf, u32 rule_len, kernel_rule_t *rule)
         break;
 
       case RULE_OP_MANGLE_TITLE:
-        SET_NAME (rule, rule_buf[rule_pos]);
+        SET_NAME    (rule, rule_buf[rule_pos]);
         break;
 
       case RULE_OP_MANGLE_TITLE_SEP:
-        SET_NAME (rule, rule_buf[rule_pos]);
-        SET_P0   (rule, rule_buf[rule_pos]);
+        SET_NAME    (rule, rule_buf[rule_pos]);
+        SET_P0      (rule, rule_buf[rule_pos]);
+        break;
+
+      case RULE_OP_MANGLE_TOGGLE_AT_SEP:
+        SET_NAME    (rule, rule_buf[rule_pos]);
+        SET_P0_CONV (rule, rule_buf[rule_pos]);
+        SET_P1      (rule, rule_buf[rule_pos]);
         break;
 
       default:
@@ -675,6 +682,12 @@ int kernel_rule_to_cpu_rule (char *rule_buf, kernel_rule_t *rule)
         GET_P0 (rule);
         break;
 
+      case RULE_OP_MANGLE_TOGGLE_AT_SEP:
+        rule_buf[rule_pos] = rule_cmd;
+        GET_P0_CONV (rule);
+        GET_P1      (rule);
+        break;
+
       case 0:
         if (rule_pos == 0) return -1;
         return rule_pos - 1;
diff --git a/src/rp_cpu.c b/src/rp_cpu.c
index b9dc23e69..0fd6265af 100644
--- a/src/rp_cpu.c
+++ b/src/rp_cpu.c
@@ -45,6 +45,41 @@ static void MANGLE_SWITCH (char *arr, const int l, const int r)
   arr[l] = c;
 }
 
+static int mangle_toggle_at_sep (char arr[RP_PASSWORD_SIZE], int arr_len, char c, int upos)
+{
+  int toggle_next = 0;
+
+  int occurrence = 0;
+
+  int pos;
+
+  for (pos = 0; pos < arr_len; pos++)
+  {
+    if (arr[pos] == c)
+    {
+      if (occurrence == upos)
+      {
+        toggle_next = 1;
+      }
+      else
+      {
+        occurrence++;
+      }
+
+      continue;
+    }
+
+    if (toggle_next == 1)
+    {
+      MANGLE_TOGGLE_AT (arr, pos);
+
+      break;
+    }
+  }
+
+  return (arr_len);
+}
+
 static int mangle_lrest (char arr[RP_PASSWORD_SIZE], int arr_len)
 {
   int pos;
@@ -561,6 +596,13 @@ int _old_apply_rule (const char *rule, int rule_len, char in[RP_PASSWORD_SIZE],
         if (upos < out_len) MANGLE_TOGGLE_AT (out, upos);
         break;
 
+      case RULE_OP_MANGLE_TOGGLE_AT_SEP:
+        NEXT_RULEPOS (rule_pos);
+        NEXT_RPTOI (rule_new, rule_pos, upos);
+        NEXT_RULEPOS (rule_pos);
+        out_len = mangle_toggle_at_sep (out, out_len, rule_new[rule_pos], upos);
+        break;
+
       case RULE_OP_MANGLE_REVERSE:
         out_len = mangle_reverse (out, out_len);
         break;
diff --git a/src/selftest.c b/src/selftest.c
index e9367e152..71b4e68b2 100644
--- a/src/selftest.c
+++ b/src/selftest.c
@@ -441,6 +441,10 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   // main : run the kernel
 
+  const u32 kernel_threads_sav = device_param->kernel_threads;
+
+  device_param->kernel_threads = device_param->kernel_threads_min;
+
   const double spin_damp_sav = device_param->spin_damp;
 
   device_param->spin_damp = 0;
@@ -675,6 +679,8 @@ static int selftest (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
 
   device_param->spin_damp = spin_damp_sav;
 
+  device_param->kernel_threads = kernel_threads_sav;
+
   // check : check if cracked
 
   u32 num_cracked = 0;
diff --git a/src/shared.c b/src/shared.c
index 7efdd4d53..9bff0c646 100644
--- a/src/shared.c
+++ b/src/shared.c
@@ -1181,7 +1181,7 @@ int input_tokenizer (const u8 *input_buf, const int input_len, token_t *token)
 
       for (int signature_idx = 0; signature_idx < token->signatures_cnt; signature_idx++)
       {
-        if (memcmp (token->buf[token_idx], token->signatures_buf[signature_idx], token->len[token_idx]) == 0) matched = true;
+        if (strncmp ((char *) token->buf[token_idx], token->signatures_buf[signature_idx], token->len[token_idx]) == 0) matched = true;
       }
 
       if (matched == false) return (PARSER_SIGNATURE_UNMATCHED);
diff --git a/tools/benchmark_deep.pl b/tools/benchmark_deep.pl
index ba2db8856..a481a87e9 100755
--- a/tools/benchmark_deep.pl
+++ b/tools/benchmark_deep.pl
@@ -409,7 +409,6 @@ for my $hash_type (@hash_types)
     "--wordlist-autohex-disable",
     "--potfile-disable",
     "--logfile-disable",
-    "--hwmon-disable",
     "--status",
     "--status-timer", 1,
     "--runtime", $runtime,
diff --git a/tools/test_modules/m26700.pm b/tools/test_modules/m26700.pm
new file mode 100644
index 000000000..a97bc246e
--- /dev/null
+++ b/tools/test_modules/m26700.pm
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::SHA qw (sha224 sha224_hex);
+use Digest::HMAC qw (hmac hmac_hex);
+
+sub module_constraints { [[8, 256], [32, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+  my $pkt_num = shift // int(rand(100000000));
+  my $engineID = shift // random_hex_string(26, 34);
+
+  # make even if needed
+
+  if (length($salt) %2 == 1)
+  {
+    $salt = $salt . "8";
+  }
+
+  my $string1 = $word x 1048576;
+
+  $string1 = substr ($string1, 0, 1048576);
+
+  my $sha224_digest1 = sha224_hex ($string1);
+
+  my $buf = join '', $sha224_digest1, $engineID, $sha224_digest1;
+
+  my $sha224_digest2 = sha224(pack("H*", $buf));
+
+  my $digest = hmac_hex (pack("H*", $salt), $sha224_digest2, \&sha224);
+
+  $digest = substr ($digest, 0, 32);
+
+  my $hash = sprintf ("\$SNMPv3\$3\$%s\$%s\$%s\$%s", $pkt_num, $salt, $engineID, $digest);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my $idx = index ($line, ':');
+
+  return unless $idx >= 0;
+
+  my $hash = substr ($line, 0, $idx);
+  my $word = substr ($line, $idx + 1);
+
+  return unless length ($word) gt 0;
+  return unless substr ($hash, 0, 10) eq '$SNMPv3$3$';
+
+  my (undef, $signature, $version, $pkt_num, $salt, $engineID, $digest) = split '\$', $hash;
+
+  return unless defined $signature;
+  return unless defined $version;
+  return unless defined $pkt_num;
+  return unless defined $salt;
+  return unless defined $engineID;
+  return unless defined $digest;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID);
+
+  return ($new_hash, $word);
+}
+
+1;
diff --git a/tools/test_modules/m26800.pm b/tools/test_modules/m26800.pm
new file mode 100644
index 000000000..3039afb6b
--- /dev/null
+++ b/tools/test_modules/m26800.pm
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::SHA qw (sha256 sha256_hex);
+use Digest::HMAC qw (hmac hmac_hex);
+
+sub module_constraints { [[8, 256], [48, 3000], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+  my $salt = shift;
+  my $pkt_num = shift // int(rand(100000000));
+  my $engineID = shift // random_hex_string(26, 34);
+
+  # make even if needed
+
+  if (length($salt) %2 == 1)
+  {
+    $salt = $salt . "8";
+  }
+
+  my $string1 = $word x 1048576;
+
+  $string1 = substr ($string1, 0, 1048576);
+
+  my $sha256_digest1 = sha256_hex ($string1);
+
+  my $buf = join '', $sha256_digest1, $engineID, $sha256_digest1;
+
+  my $sha256_digest2 = sha256(pack("H*", $buf));
+
+  my $digest = hmac_hex (pack("H*", $salt), $sha256_digest2, \&sha256);
+
+  $digest = substr ($digest, 0, 48);
+
+  my $hash = sprintf ("\$SNMPv3\$4\$%s\$%s\$%s\$%s", $pkt_num, $salt, $engineID, $digest);
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my $idx = index ($line, ':');
+
+  return unless $idx >= 0;
+
+  my $hash = substr ($line, 0, $idx);
+  my $word = substr ($line, $idx + 1);
+
+  return unless length ($word) gt 0;
+  return unless substr ($hash, 0, 10) eq '$SNMPv3$4$';
+
+  my (undef, $signature, $version, $pkt_num, $salt, $engineID, $digest) = split '\$', $hash;
+
+  return unless defined $signature;
+  return unless defined $version;
+  return unless defined $pkt_num;
+  return unless defined $salt;
+  return unless defined $engineID;
+  return unless defined $digest;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt, $pkt_num, $engineID);
+
+  return ($new_hash, $word);
+}
+
+1;