diff --git a/OpenCL/inc_hash_blake2s.cl b/OpenCL/inc_hash_blake2s.cl
new file mode 100644
index 000000000..c10006068
--- /dev/null
+++ b/OpenCL/inc_hash_blake2s.cl
@@ -0,0 +1,702 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "inc_vendor.h"
+#include "inc_types.h"
+#include "inc_platform.h"
+#include "inc_common.h"
+#include "inc_hash_blake2s.h"
+
+DECLSPEC u32 blake2s_rot16_S (const u32 a)
+{
+  vconv32_t in;
+
+  in.v32 = a;
+
+  vconv32_t out;
+
+  out.v16.a = in.v16.b;
+  out.v16.b = in.v16.a;
+
+  return out.v32;
+}
+
+DECLSPEC u32x blake2s_rot16 (const u32x a)
+{
+  u32x r;
+  
+  #if VECT_SIZE == 1
+  r = blake2s_rot16_S (a);
+  #endif
+
+  #if VECT_SIZE >= 2
+  r.s0 = blake2s_rot16_S (a.s0);
+  r.s1 = blake2s_rot16_S (a.s1);
+  #endif
+
+  #if VECT_SIZE >= 4
+  r.s2 = blake2s_rot16_S (a.s2);
+  r.s3 = blake2s_rot16_S (a.s3);
+  #endif
+
+  #if VECT_SIZE >= 8
+  r.s4 = blake2s_rot16_S (a.s4);
+  r.s5 = blake2s_rot16_S (a.s5);
+  r.s6 = blake2s_rot16_S (a.s6);
+  r.s7 = blake2s_rot16_S (a.s7);
+  #endif
+
+  #if VECT_SIZE >= 16
+  r.s8 = blake2s_rot16_S (a.s8);
+  r.s9 = blake2s_rot16_S (a.s9);
+  r.sa = blake2s_rot16_S (a.sa);
+  r.sb = blake2s_rot16_S (a.sb);
+  r.sc = blake2s_rot16_S (a.sc);
+  r.sd = blake2s_rot16_S (a.sd);
+  r.se = blake2s_rot16_S (a.se);
+  r.sf = blake2s_rot16_S (a.sf);
+  #endif
+
+  return r;
+}
+
+DECLSPEC u32 blake2s_rot08_S (const u32 a)
+{
+  #if defined IS_NV
+
+  vconv32_t in;
+
+  in.v32 = a;
+
+  vconv32_t out;
+
+  out.v32 = hc_byte_perm_S (in.v32, in.v32, 0x0321);
+
+  return out.v32;
+
+  #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
+
+  vconv32_t in;
+
+  in.v32 = a;
+
+  vconv32_t out;
+
+  out.v32 = hc_byte_perm_S (in.v32, in.v32, 0x00030201);
+
+  return out.v32;
+
+  #else
+
+  return hc_rotr32_S (a, 8);
+
+  #endif
+}
+
+DECLSPEC u32x blake2s_rot08 (const u32x a)
+{
+  u32x r;
+
+  #if VECT_SIZE == 1
+  r = blake2s_rot08_S (a);
+  #endif
+
+  #if VECT_SIZE >= 2
+  r.s0 = blake2s_rot08_S (a.s0);
+  r.s1 = blake2s_rot08_S (a.s1);
+  #endif
+
+  #if VECT_SIZE >= 4
+  r.s2 = blake2s_rot08_S (a.s2);
+  r.s3 = blake2s_rot08_S (a.s3);
+  #endif
+
+  #if VECT_SIZE >= 8
+  r.s4 = blake2s_rot08_S (a.s4);
+  r.s5 = blake2s_rot08_S (a.s5);
+  r.s6 = blake2s_rot08_S (a.s6);
+  r.s7 = blake2s_rot08_S (a.s7);
+  #endif
+
+  #if VECT_SIZE >= 16
+  r.s8 = blake2s_rot08_S (a.s8);
+  r.s9 = blake2s_rot08_S (a.s9);
+  r.sa = blake2s_rot08_S (a.sa);
+  r.sb = blake2s_rot08_S (a.sb);
+  r.sc = blake2s_rot08_S (a.sc);
+  r.sd = blake2s_rot08_S (a.sd);
+  r.se = blake2s_rot08_S (a.se);
+  r.sf = blake2s_rot08_S (a.sf);
+  #endif
+
+  return r;
+}
+
+DECLSPEC void blake2s_transform (PRIVATE_AS u32 *h, PRIVATE_AS const u32 *m, const int len, const u32 f0)
+{
+  const u32 t0 = len;
+
+  u32 v[16];
+
+  v[ 0] = h[0];
+  v[ 1] = h[1];
+  v[ 2] = h[2];
+  v[ 3] = h[3];
+  v[ 4] = h[4];
+  v[ 5] = h[5];
+  v[ 6] = h[6];
+  v[ 7] = h[7];
+  v[ 8] = BLAKE2S_IV_00;
+  v[ 9] = BLAKE2S_IV_01;
+  v[10] = BLAKE2S_IV_02;
+  v[11] = BLAKE2S_IV_03;
+  v[12] = BLAKE2S_IV_04 ^ t0;
+  v[13] = BLAKE2S_IV_05; // ^ t1;
+  v[14] = BLAKE2S_IV_06 ^ f0;
+  v[15] = BLAKE2S_IV_07; // ^ f1;
+
+  BLAKE2S_ROUND ( 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15);
+  BLAKE2S_ROUND (14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3);
+  BLAKE2S_ROUND (11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4);
+  BLAKE2S_ROUND ( 7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8);
+  BLAKE2S_ROUND ( 9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13);
+  BLAKE2S_ROUND ( 2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9);
+  BLAKE2S_ROUND (12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11);
+  BLAKE2S_ROUND (13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10);
+  BLAKE2S_ROUND ( 6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5);
+  BLAKE2S_ROUND (10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0);
+
+  h[0] = h[0] ^ v[0] ^ v[ 8];
+  h[1] = h[1] ^ v[1] ^ v[ 9];
+  h[2] = h[2] ^ v[2] ^ v[10];
+  h[3] = h[3] ^ v[3] ^ v[11];
+  h[4] = h[4] ^ v[4] ^ v[12];
+  h[5] = h[5] ^ v[5] ^ v[13];
+  h[6] = h[6] ^ v[6] ^ v[14];
+  h[7] = h[7] ^ v[7] ^ v[15];
+}
+
+DECLSPEC void blake2s_init (PRIVATE_AS blake2s_ctx_t *ctx)
+{
+  ctx->h[0] = BLAKE2S_IV_00 ^ 0x01010020; // default output length: 0x20 = 32 bytes
+  ctx->h[1] = BLAKE2S_IV_01;
+  ctx->h[2] = BLAKE2S_IV_02;
+  ctx->h[3] = BLAKE2S_IV_03;
+  ctx->h[4] = BLAKE2S_IV_04;
+  ctx->h[5] = BLAKE2S_IV_05;
+  ctx->h[6] = BLAKE2S_IV_06;
+  ctx->h[7] = BLAKE2S_IV_07;
+
+  ctx->m[ 0] = 0;
+  ctx->m[ 1] = 0;
+  ctx->m[ 2] = 0;
+  ctx->m[ 3] = 0;
+  ctx->m[ 4] = 0;
+  ctx->m[ 5] = 0;
+  ctx->m[ 6] = 0;
+  ctx->m[ 7] = 0;
+  ctx->m[ 8] = 0;
+  ctx->m[ 9] = 0;
+  ctx->m[10] = 0;
+  ctx->m[11] = 0;
+  ctx->m[12] = 0;
+  ctx->m[13] = 0;
+  ctx->m[14] = 0;
+  ctx->m[15] = 0;
+
+  ctx->len = 0;
+}
+
+DECLSPEC void blake2s_update_64 (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len)
+{
+  if (len == 0) return;
+
+  const int pos = ctx->len & 63;
+
+  if (pos == 0)
+  {
+    if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform
+    {
+      blake2s_transform (ctx->h, ctx->m, ctx->len, BLAKE2S_UPDATE);
+    }
+
+    ctx->m[ 0] = w0[0];
+    ctx->m[ 1] = w0[1];
+    ctx->m[ 2] = w0[2];
+    ctx->m[ 3] = w0[3];
+    ctx->m[ 4] = w1[0];
+    ctx->m[ 5] = w1[1];
+    ctx->m[ 6] = w1[2];
+    ctx->m[ 7] = w1[3];
+    ctx->m[ 8] = w2[0];
+    ctx->m[ 9] = w2[1];
+    ctx->m[10] = w2[2];
+    ctx->m[11] = w2[3];
+    ctx->m[12] = w3[0];
+    ctx->m[13] = w3[1];
+    ctx->m[14] = w3[2];
+    ctx->m[15] = w3[3];
+  }
+  else
+  {
+    if ((pos + len) <= 64)
+    {
+      switch_buffer_by_offset_le_S (w0, w1, w2, w3, pos);
+
+      ctx->m[ 0] |= w0[0];
+      ctx->m[ 1] |= w0[1];
+      ctx->m[ 2] |= w0[2];
+      ctx->m[ 3] |= w0[3];
+      ctx->m[ 4] |= w1[0];
+      ctx->m[ 5] |= w1[1];
+      ctx->m[ 6] |= w1[2];
+      ctx->m[ 7] |= w1[3];
+      ctx->m[ 8] |= w2[0];
+      ctx->m[ 9] |= w2[1];
+      ctx->m[10] |= w2[2];
+      ctx->m[11] |= w2[3];
+      ctx->m[12] |= w3[0];
+      ctx->m[13] |= w3[1];
+      ctx->m[14] |= w3[2];
+      ctx->m[15] |= w3[3];
+    }
+    else
+    {
+      u32 c0[4] = { 0 };
+      u32 c1[4] = { 0 };
+      u32 c2[4] = { 0 };
+      u32 c3[4] = { 0 };
+
+      switch_buffer_by_offset_carry_le_S (w0, w1, w2, w3, c0, c1, c2, c3, pos);
+
+      ctx->m[ 0] |= w0[0];
+      ctx->m[ 1] |= w0[1];
+      ctx->m[ 2] |= w0[2];
+      ctx->m[ 3] |= w0[3];
+      ctx->m[ 4] |= w1[0];
+      ctx->m[ 5] |= w1[1];
+      ctx->m[ 6] |= w1[2];
+      ctx->m[ 7] |= w1[3];
+      ctx->m[ 8] |= w2[0];
+      ctx->m[ 9] |= w2[1];
+      ctx->m[10] |= w2[2];
+      ctx->m[11] |= w2[3];
+      ctx->m[12] |= w3[0];
+      ctx->m[13] |= w3[1];
+      ctx->m[14] |= w3[2];
+      ctx->m[15] |= w3[3];
+
+      // len must be a multiple of 64 (not ctx->len) for BLAKE2S_UPDATE:
+
+      const u32 cur_len = ((ctx->len + len) / 64) * 64;
+
+      blake2s_transform (ctx->h, ctx->m, cur_len, BLAKE2S_UPDATE);
+
+      ctx->m[ 0] = c0[0];
+      ctx->m[ 1] = c0[1];
+      ctx->m[ 2] = c0[2];
+      ctx->m[ 3] = c0[3];
+      ctx->m[ 4] = c1[0];
+      ctx->m[ 5] = c1[1];
+      ctx->m[ 6] = c1[2];
+      ctx->m[ 7] = c1[3];
+      ctx->m[ 8] = c2[0];
+      ctx->m[ 9] = c2[1];
+      ctx->m[10] = c2[2];
+      ctx->m[11] = c2[3];
+      ctx->m[12] = c3[0];
+      ctx->m[13] = c3[1];
+      ctx->m[14] = c3[2];
+      ctx->m[15] = c3[3];
+    }
+  }
+
+  ctx->len += len;
+}
+
+DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  const int limit = (const int) len - 64; // int type needed, could be negative
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+    
+    blake2s_update_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  blake2s_update_64 (ctx, w0, w1, w2, w3, len - (u32) pos1);
+}
+
+DECLSPEC void blake2s_update_global (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
+{
+  u32 w0[4];
+  u32 w1[4];
+  u32 w2[4];
+  u32 w3[4];
+
+  const int limit = (const int) len - 64; // int type needed, could be negative
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    blake2s_update_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  blake2s_update_64 (ctx, w0, w1, w2, w3, len - (u32) pos1);
+}
+
+DECLSPEC void blake2s_final (PRIVATE_AS blake2s_ctx_t *ctx)
+{
+  blake2s_transform (ctx->h, ctx->m, ctx->len, BLAKE2S_FINAL);
+}
+
+DECLSPEC void blake2s_transform_vector (PRIVATE_AS u32x *h, PRIVATE_AS const u32x *m, const u32x len, const u32 f0)
+{
+  const u32x t0 = len;
+
+  u32x v[16];
+
+  v[ 0] = h[0];
+  v[ 1] = h[1];
+  v[ 2] = h[2];
+  v[ 3] = h[3];
+  v[ 4] = h[4];
+  v[ 5] = h[5];
+  v[ 6] = h[6];
+  v[ 7] = h[7];
+  v[ 8] = BLAKE2S_IV_00;
+  v[ 9] = BLAKE2S_IV_01;
+  v[10] = BLAKE2S_IV_02;
+  v[11] = BLAKE2S_IV_03;
+  v[12] = BLAKE2S_IV_04 ^ t0;
+  v[13] = BLAKE2S_IV_05; // ^ t1;
+  v[14] = BLAKE2S_IV_06 ^ f0;
+  v[15] = BLAKE2S_IV_07; // ^ f1;
+  
+  BLAKE2S_ROUND_VECTOR ( 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15);
+  BLAKE2S_ROUND_VECTOR (14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3);
+  BLAKE2S_ROUND_VECTOR (11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4);
+  BLAKE2S_ROUND_VECTOR ( 7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8);
+  BLAKE2S_ROUND_VECTOR ( 9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13);
+  BLAKE2S_ROUND_VECTOR ( 2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9);
+  BLAKE2S_ROUND_VECTOR (12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11);
+  BLAKE2S_ROUND_VECTOR (13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10);
+  BLAKE2S_ROUND_VECTOR ( 6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5);
+  BLAKE2S_ROUND_VECTOR (10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0);
+
+  h[0] = h[0] ^ v[0] ^ v[ 8];
+  h[1] = h[1] ^ v[1] ^ v[ 9];
+  h[2] = h[2] ^ v[2] ^ v[10];
+  h[3] = h[3] ^ v[3] ^ v[11];
+  h[4] = h[4] ^ v[4] ^ v[12];
+  h[5] = h[5] ^ v[5] ^ v[13];
+  h[6] = h[6] ^ v[6] ^ v[14];
+  h[7] = h[7] ^ v[7] ^ v[15];
+}
+
+DECLSPEC void blake2s_init_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx)
+{
+  ctx->h[0] = BLAKE2S_IV_00 ^ 0x01010020; // default output length: 0x20 = 32 bytes
+  ctx->h[1] = BLAKE2S_IV_01;
+  ctx->h[2] = BLAKE2S_IV_02;
+  ctx->h[3] = BLAKE2S_IV_03;
+  ctx->h[4] = BLAKE2S_IV_04;
+  ctx->h[5] = BLAKE2S_IV_05;
+  ctx->h[6] = BLAKE2S_IV_06;
+  ctx->h[7] = BLAKE2S_IV_07;
+
+  ctx->m[ 0] = 0;
+  ctx->m[ 1] = 0;
+  ctx->m[ 2] = 0;
+  ctx->m[ 3] = 0;
+  ctx->m[ 4] = 0;
+  ctx->m[ 5] = 0;
+  ctx->m[ 6] = 0;
+  ctx->m[ 7] = 0;
+  ctx->m[ 8] = 0;
+  ctx->m[ 9] = 0;
+  ctx->m[10] = 0;
+  ctx->m[11] = 0;
+  ctx->m[12] = 0;
+  ctx->m[13] = 0;
+  ctx->m[14] = 0;
+  ctx->m[15] = 0;
+
+  ctx->len = 0;
+}
+
+DECLSPEC void blake2s_init_vector_from_scalar (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS blake2s_ctx_t *ctx0)
+{
+  ctx->h[0] = ctx0->h[0];
+  ctx->h[1] = ctx0->h[1];
+  ctx->h[2] = ctx0->h[2];
+  ctx->h[3] = ctx0->h[3];
+  ctx->h[4] = ctx0->h[4];
+  ctx->h[5] = ctx0->h[5];
+  ctx->h[6] = ctx0->h[6];
+  ctx->h[7] = ctx0->h[7];
+
+  ctx->m[ 0] = ctx0->m[ 0];
+  ctx->m[ 1] = ctx0->m[ 1];
+  ctx->m[ 2] = ctx0->m[ 2];
+  ctx->m[ 3] = ctx0->m[ 3];
+  ctx->m[ 4] = ctx0->m[ 4];
+  ctx->m[ 5] = ctx0->m[ 5];
+  ctx->m[ 6] = ctx0->m[ 6];
+  ctx->m[ 7] = ctx0->m[ 7];
+  ctx->m[ 8] = ctx0->m[ 8];
+  ctx->m[ 9] = ctx0->m[ 9];
+  ctx->m[10] = ctx0->m[10];
+  ctx->m[11] = ctx0->m[11];
+  ctx->m[12] = ctx0->m[12];
+  ctx->m[13] = ctx0->m[13];
+  ctx->m[14] = ctx0->m[14];
+  ctx->m[15] = ctx0->m[15];
+
+  ctx->len = ctx0->len;
+}
+
+DECLSPEC void blake2s_update_vector_64 (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len)
+{
+  if (len == 0) return;
+
+  const int pos = ctx->len & 63;
+
+  if (pos == 0)
+  {
+    if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform
+    {
+      blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2S_UPDATE);
+    }
+
+    ctx->m[ 0] = w0[0];
+    ctx->m[ 1] = w0[1];
+    ctx->m[ 2] = w0[2];
+    ctx->m[ 3] = w0[3];
+    ctx->m[ 4] = w1[0];
+    ctx->m[ 5] = w1[1];
+    ctx->m[ 6] = w1[2];
+    ctx->m[ 7] = w1[3];
+    ctx->m[ 8] = w2[0];
+    ctx->m[ 9] = w2[1];
+    ctx->m[10] = w2[2];
+    ctx->m[11] = w2[3];
+    ctx->m[12] = w3[0];
+    ctx->m[13] = w3[1];
+    ctx->m[14] = w3[2];
+    ctx->m[15] = w3[3];
+  }
+  else
+  {
+    if ((pos + len) <= 64)
+    {
+      switch_buffer_by_offset_le (w0, w1, w2, w3, pos);
+
+      ctx->m[ 0] |= w0[0];
+      ctx->m[ 1] |= w0[1];
+      ctx->m[ 2] |= w0[2];
+      ctx->m[ 3] |= w0[3];
+      ctx->m[ 4] |= w1[0];
+      ctx->m[ 5] |= w1[1];
+      ctx->m[ 6] |= w1[2];
+      ctx->m[ 7] |= w1[3];
+      ctx->m[ 8] |= w2[0];
+      ctx->m[ 9] |= w2[1];
+      ctx->m[10] |= w2[2];
+      ctx->m[11] |= w2[3];
+      ctx->m[12] |= w3[0];
+      ctx->m[13] |= w3[1];
+      ctx->m[14] |= w3[2];
+      ctx->m[15] |= w3[3];
+    }
+    else
+    {
+      u32x c0[4] = { 0 };
+      u32x c1[4] = { 0 };
+      u32x c2[4] = { 0 };
+      u32x c3[4] = { 0 };
+
+      switch_buffer_by_offset_carry_le (w0, w1, w2, w3, c0, c1, c2, c3, pos);
+
+      ctx->m[ 0] |= w0[0];
+      ctx->m[ 1] |= w0[1];
+      ctx->m[ 2] |= w0[2];
+      ctx->m[ 3] |= w0[3];
+      ctx->m[ 4] |= w1[0];
+      ctx->m[ 5] |= w1[1];
+      ctx->m[ 6] |= w1[2];
+      ctx->m[ 7] |= w1[3];
+      ctx->m[ 8] |= w2[0];
+      ctx->m[ 9] |= w2[1];
+      ctx->m[10] |= w2[2];
+      ctx->m[11] |= w2[3];
+      ctx->m[12] |= w3[0];
+      ctx->m[13] |= w3[1];
+      ctx->m[14] |= w3[2];
+      ctx->m[15] |= w3[3];
+
+      // len must be a multiple of 64 (not ctx->len) for BLAKE2S_UPDATE:
+
+      const u32x cur_len = ((ctx->len + len) / 64) * 64;
+
+      blake2s_transform_vector (ctx->h, ctx->m, cur_len, BLAKE2S_UPDATE);
+
+      ctx->m[ 0] = c0[0];
+      ctx->m[ 1] = c0[1];
+      ctx->m[ 2] = c0[2];
+      ctx->m[ 3] = c0[3];
+      ctx->m[ 4] = c1[0];
+      ctx->m[ 5] = c1[1];
+      ctx->m[ 6] = c1[2];
+      ctx->m[ 7] = c1[3];
+      ctx->m[ 8] = c2[0];
+      ctx->m[ 9] = c2[1];
+      ctx->m[10] = c2[2];
+      ctx->m[11] = c2[3];
+      ctx->m[12] = c3[0];
+      ctx->m[13] = c3[1];
+      ctx->m[14] = c3[2];
+      ctx->m[15] = c3[3];
+    }
+  }
+
+  ctx->len += len;
+}
+
+DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  const int limit = (const int) len - 64; // int type needed, could be negative
+
+  int pos1;
+  int pos4;
+
+  for (pos1 = 0, pos4 = 0; pos1 < limit; pos1 += 64, pos4 += 16)
+  {
+    w0[0] = w[pos4 +  0];
+    w0[1] = w[pos4 +  1];
+    w0[2] = w[pos4 +  2];
+    w0[3] = w[pos4 +  3];
+    w1[0] = w[pos4 +  4];
+    w1[1] = w[pos4 +  5];
+    w1[2] = w[pos4 +  6];
+    w1[3] = w[pos4 +  7];
+    w2[0] = w[pos4 +  8];
+    w2[1] = w[pos4 +  9];
+    w2[2] = w[pos4 + 10];
+    w2[3] = w[pos4 + 11];
+    w3[0] = w[pos4 + 12];
+    w3[1] = w[pos4 + 13];
+    w3[2] = w[pos4 + 14];
+    w3[3] = w[pos4 + 15];
+
+    blake2s_update_vector_64 (ctx, w0, w1, w2, w3, 64);
+  }
+
+  w0[0] = w[pos4 +  0];
+  w0[1] = w[pos4 +  1];
+  w0[2] = w[pos4 +  2];
+  w0[3] = w[pos4 +  3];
+  w1[0] = w[pos4 +  4];
+  w1[1] = w[pos4 +  5];
+  w1[2] = w[pos4 +  6];
+  w1[3] = w[pos4 +  7];
+  w2[0] = w[pos4 +  8];
+  w2[1] = w[pos4 +  9];
+  w2[2] = w[pos4 + 10];
+  w2[3] = w[pos4 + 11];
+  w3[0] = w[pos4 + 12];
+  w3[1] = w[pos4 + 13];
+  w3[2] = w[pos4 + 14];
+  w3[3] = w[pos4 + 15];
+
+  blake2s_update_vector_64 (ctx, w0, w1, w2, w3, len - (u32) pos1);
+}
+
+DECLSPEC void blake2s_final_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx)
+{
+  blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2S_FINAL);
+}
diff --git a/OpenCL/inc_hash_blake2s.h b/OpenCL/inc_hash_blake2s.h
new file mode 100644
index 000000000..63f2942f1
--- /dev/null
+++ b/OpenCL/inc_hash_blake2s.h
@@ -0,0 +1,96 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef INC_HASH_BLAKE2S_H
+#define INC_HASH_BLAKE2S_H
+
+#define BLAKE2S_UPDATE  0
+#define BLAKE2S_FINAL  -1
+
+DECLSPEC u32  blake2s_rot16_S (const u32  a);
+DECLSPEC u32x blake2s_rot16   (const u32x a);
+
+DECLSPEC u32  blake2s_rot08_S (const u32  a);
+DECLSPEC u32x blake2s_rot08   (const u32x a);
+
+#define BLAKE2S_G(k0,k1,a,b,c,d) \
+{                                \
+  a = a + b + m[k0];             \
+  d = blake2s_rot16_S (d ^ a);   \
+  c = c + d;                     \
+  b = hc_rotr32_S (b ^ c, 12);   \
+  a = a + b + m[k1];             \
+  d = blake2s_rot08_S (d ^ a);   \
+  c = c + d;                     \
+  b = hc_rotr32_S (b ^ c, 7);    \
+}
+
+#define BLAKE2S_ROUND(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \
+{                                                                      \
+  BLAKE2S_G (c0, c1, v[0], v[4], v[ 8], v[12]);                        \
+  BLAKE2S_G (c2, c3, v[1], v[5], v[ 9], v[13]);                        \
+  BLAKE2S_G (c4, c5, v[2], v[6], v[10], v[14]);                        \
+  BLAKE2S_G (c6, c7, v[3], v[7], v[11], v[15]);                        \
+  BLAKE2S_G (c8, c9, v[0], v[5], v[10], v[15]);                        \
+  BLAKE2S_G (ca, cb, v[1], v[6], v[11], v[12]);                        \
+  BLAKE2S_G (cc, cd, v[2], v[7], v[ 8], v[13]);                        \
+  BLAKE2S_G (ce, cf, v[3], v[4], v[ 9], v[14]);                        \
+}
+
+#define BLAKE2S_G_VECTOR(k0,k1,a,b,c,d) \
+{                                       \
+  a = a + b + m[k0];                    \
+  d = blake2s_rot16 (d ^ a);            \
+  c = c + d;                            \
+  b = hc_rotr32 (b ^ c, 12);            \
+  a = a + b + m[k1];                    \
+  d = blake2s_rot08 (d ^ a);            \
+  c = c + d;                            \
+  b = hc_rotr32 (b ^ c, 7);             \
+}
+
+#define BLAKE2S_ROUND_VECTOR(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,ca,cb,cc,cd,ce,cf) \
+{                                                                             \
+  BLAKE2S_G_VECTOR (c0, c1, v[0], v[4], v[ 8], v[12]);                        \
+  BLAKE2S_G_VECTOR (c2, c3, v[1], v[5], v[ 9], v[13]);                        \
+  BLAKE2S_G_VECTOR (c4, c5, v[2], v[6], v[10], v[14]);                        \
+  BLAKE2S_G_VECTOR (c6, c7, v[3], v[7], v[11], v[15]);                        \
+  BLAKE2S_G_VECTOR (c8, c9, v[0], v[5], v[10], v[15]);                        \
+  BLAKE2S_G_VECTOR (ca, cb, v[1], v[6], v[11], v[12]);                        \
+  BLAKE2S_G_VECTOR (cc, cd, v[2], v[7], v[ 8], v[13]);                        \
+  BLAKE2S_G_VECTOR (ce, cf, v[3], v[4], v[ 9], v[14]);                        \
+}
+
+typedef struct blake2s_ctx
+{
+  u32 m[16]; // buffer
+  u32 h[ 8]; // digest
+
+  int len;
+
+} blake2s_ctx_t;
+
+typedef struct blake2s_ctx_vector
+{
+  u32x m[16]; // buffer
+  u32x h[ 8]; // digest
+
+  int len;
+
+} blake2s_ctx_vector_t;
+
+DECLSPEC void blake2s_transform (PRIVATE_AS u32 *h, PRIVATE_AS const u32 *m, const int len, const u32 f0);
+DECLSPEC void blake2s_init (PRIVATE_AS blake2s_ctx_t *ctx);
+DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
+DECLSPEC void blake2s_update_global (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
+DECLSPEC void blake2s_final (PRIVATE_AS blake2s_ctx_t *ctx);
+
+DECLSPEC void blake2s_transform_vector (PRIVATE_AS u32x *h, PRIVATE_AS const u32x *m, const u32x len, const u32 f0);
+DECLSPEC void blake2s_init_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx);
+DECLSPEC void blake2s_init_vector_from_scalar (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS blake2s_ctx_t *ctx0);
+DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
+DECLSPEC void blake2s_final_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx);
+
+#endif // INC_HASH_BLAKE2S_H
diff --git a/OpenCL/inc_types.h b/OpenCL/inc_types.h
index 0adcbc5df..575b1538f 100644
--- a/OpenCL/inc_types.h
+++ b/OpenCL/inc_types.h
@@ -1666,6 +1666,19 @@ typedef enum blake2b_constants
 
 } blake2b_constants_t;
 
+typedef enum blake2s_constants
+{
+  BLAKE2S_IV_00=0x6a09e667,
+  BLAKE2S_IV_01=0xbb67ae85,
+  BLAKE2S_IV_02=0x3c6ef372,
+  BLAKE2S_IV_03=0xa54ff53a,
+  BLAKE2S_IV_04=0x510e527f,
+  BLAKE2S_IV_05=0x9b05688c,
+  BLAKE2S_IV_06=0x1f83d9ab,
+  BLAKE2S_IV_07=0x5be0cd19
+
+} blake2s_constants_t;
+
 typedef enum combinator_mode
 {
   COMBINATOR_MODE_BASE_LEFT  = 10001,
diff --git a/OpenCL/m31000_a0-optimized.cl b/OpenCL/m31000_a0-optimized.cl
new file mode 100644
index 000000000..fbebe24f4
--- /dev/null
+++ b/OpenCL/m31000_a0-optimized.cl
@@ -0,0 +1,199 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
+#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl)
+#endif
+
+KERNEL_FQ void m31000_m04 (KERN_ATTR_RULES ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    u32x w0[4] = { 0 };
+    u32x w1[4] = { 0 };
+    u32x w2[4] = { 0 };
+    u32x w3[4] = { 0 };
+
+    const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
+
+    u32x m[16];
+
+    m[ 0] = w0[0];
+    m[ 1] = w0[1];
+    m[ 2] = w0[2];
+    m[ 3] = w0[3];
+    m[ 4] = w1[0];
+    m[ 5] = w1[1];
+    m[ 6] = w1[2];
+    m[ 7] = w1[3];
+    m[ 8] = 0;
+    m[ 9] = 0;
+    m[10] = 0;
+    m[11] = 0;
+    m[12] = 0;
+    m[13] = 0;
+    m[14] = 0;
+    m[15] = 0;
+
+    u32x h[8];
+
+    h[0] = BLAKE2S_IV_00 ^ 0x01010020;
+    h[1] = BLAKE2S_IV_01;
+    h[2] = BLAKE2S_IV_02;
+    h[3] = BLAKE2S_IV_03;
+    h[4] = BLAKE2S_IV_04;
+    h[5] = BLAKE2S_IV_05;
+    h[6] = BLAKE2S_IV_06;
+    h[7] = BLAKE2S_IV_07;
+
+    blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL);
+
+    const u32x r0 = h[DGST_R0];
+    const u32x r1 = h[DGST_R1];
+    const u32x r2 = h[DGST_R2];
+    const u32x r3 = h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_m08 (KERN_ATTR_RULES ())
+{
+}
+
+KERNEL_FQ void m31000_m16 (KERN_ATTR_RULES ())
+{
+}
+
+KERNEL_FQ void m31000_s04 (KERN_ATTR_RULES ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    u32x w0[4] = { 0 };
+    u32x w1[4] = { 0 };
+
+    const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
+
+    u32x m[16];
+
+    m[ 0] = w0[0];
+    m[ 1] = w0[1];
+    m[ 2] = w0[2];
+    m[ 3] = w0[3];
+    m[ 4] = w1[0];
+    m[ 5] = w1[1];
+    m[ 6] = w1[2];
+    m[ 7] = w1[3];
+    m[ 8] = 0;
+    m[ 9] = 0;
+    m[10] = 0;
+    m[11] = 0;
+    m[12] = 0;
+    m[13] = 0;
+    m[14] = 0;
+    m[15] = 0;
+
+    u32x h[8];
+
+    h[0] = BLAKE2S_IV_00 ^ 0x01010020;
+    h[1] = BLAKE2S_IV_01;
+    h[2] = BLAKE2S_IV_02;
+    h[3] = BLAKE2S_IV_03;
+    h[4] = BLAKE2S_IV_04;
+    h[5] = BLAKE2S_IV_05;
+    h[6] = BLAKE2S_IV_06;
+    h[7] = BLAKE2S_IV_07;
+
+    blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL);
+
+    const u32x r0 = h[DGST_R0];
+    const u32x r1 = h[DGST_R1];
+    const u32x r2 = h[DGST_R2];
+    const u32x r3 = h[DGST_R3];
+    
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_s08 (KERN_ATTR_RULES ())
+{
+}
+
+KERNEL_FQ void m31000_s16 (KERN_ATTR_RULES ())
+{
+}
diff --git a/OpenCL/m31000_a0-pure.cl b/OpenCL/m31000_a0-pure.cl
new file mode 100644
index 000000000..d6fdb0a51
--- /dev/null
+++ b/OpenCL/m31000_a0-pure.cl
@@ -0,0 +1,111 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_rp.h)
+#include M2S(INCLUDE_PATH/inc_rp.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl)
+#endif
+
+KERNEL_FQ void m31000_mxx (KERN_ATTR_RULES ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    blake2s_ctx_t ctx;
+
+    blake2s_init   (&ctx);
+    blake2s_update (&ctx, tmp.i, tmp.pw_len);
+    blake2s_final  (&ctx);
+    
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_sxx (KERN_ATTR_RULES ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  COPY_PW (pws[gid]);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    pw_t tmp = PASTE_PW;
+
+    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
+
+    blake2s_ctx_t ctx;
+
+    blake2s_init   (&ctx);
+    blake2s_update (&ctx, tmp.i, tmp.pw_len);
+    blake2s_final  (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m31000_a1-optimized.cl b/OpenCL/m31000_a1-optimized.cl
new file mode 100644
index 000000000..edc4580de
--- /dev/null
+++ b/OpenCL/m31000_a1-optimized.cl
@@ -0,0 +1,299 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl)
+#endif
+
+KERNEL_FQ void m31000_m04 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_l_len = pws[gid].pw_len & 63;
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
+
+    const u32x out_len = pw_l_len + pw_r_len;
+
+    /**
+     * concat password candidate
+     */
+
+    u32x wordl0[4] = { 0 };
+    u32x wordl1[4] = { 0 };
+    u32x wordl2[4] = { 0 };
+    u32x wordl3[4] = { 0 };
+
+    wordl0[0] = pw_buf0[0];
+    wordl0[1] = pw_buf0[1];
+    wordl0[2] = pw_buf0[2];
+    wordl0[3] = pw_buf0[3];
+    wordl1[0] = pw_buf1[0];
+    wordl1[1] = pw_buf1[1];
+    wordl1[2] = pw_buf1[2];
+    wordl1[3] = pw_buf1[3];
+
+    u32x wordr0[4] = { 0 };
+    u32x wordr1[4] = { 0 };
+    u32x wordr2[4] = { 0 };
+    u32x wordr3[4] = { 0 };
+
+    wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
+    wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
+    wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
+    wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
+    wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
+    wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
+    wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
+    wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
+
+    if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
+    {
+      switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
+    }
+    else
+    {
+      switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
+    }
+
+    u32x w0[4];
+    u32x w1[4];
+
+    w0[0] = wordl0[0] | wordr0[0];
+    w0[1] = wordl0[1] | wordr0[1];
+    w0[2] = wordl0[2] | wordr0[2];
+    w0[3] = wordl0[3] | wordr0[3];
+    w1[0] = wordl1[0] | wordr1[0];
+    w1[1] = wordl1[1] | wordr1[1];
+    w1[2] = wordl1[2] | wordr1[2];
+    w1[3] = wordl1[3] | wordr1[3];
+
+    u32x m[16];
+
+    m[ 0] = w0[0];
+    m[ 1] = w0[1];
+    m[ 2] = w0[2];
+    m[ 3] = w0[3];
+    m[ 4] = w1[0];
+    m[ 5] = w1[1];
+    m[ 6] = w1[2];
+    m[ 7] = w1[3];
+    m[ 8] = 0;
+    m[ 9] = 0;
+    m[10] = 0;
+    m[11] = 0;
+    m[12] = 0;
+    m[13] = 0;
+    m[14] = 0;
+    m[15] = 0;
+
+    u32x h[8];
+
+    h[0] = BLAKE2S_IV_00 ^ 0x01010020;
+    h[1] = BLAKE2S_IV_01;
+    h[2] = BLAKE2S_IV_02;
+    h[3] = BLAKE2S_IV_03;
+    h[4] = BLAKE2S_IV_04;
+    h[5] = BLAKE2S_IV_05;
+    h[6] = BLAKE2S_IV_06;
+    h[7] = BLAKE2S_IV_07;
+
+    blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL);
+
+    const u32x r0 = h[DGST_R0];
+    const u32x r1 = h[DGST_R1];
+    const u32x r2 = h[DGST_R2];
+    const u32x r3 = h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_m08 (KERN_ATTR_BASIC ())
+{
+}
+
+KERNEL_FQ void m31000_m16 (KERN_ATTR_BASIC ())
+{
+}
+
+KERNEL_FQ void m31000_s04 (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 pw_buf0[4];
+  u32 pw_buf1[4];
+
+  pw_buf0[0] = pws[gid].i[0];
+  pw_buf0[1] = pws[gid].i[1];
+  pw_buf0[2] = pws[gid].i[2];
+  pw_buf0[3] = pws[gid].i[3];
+  pw_buf1[0] = pws[gid].i[4];
+  pw_buf1[1] = pws[gid].i[5];
+  pw_buf1[2] = pws[gid].i[6];
+  pw_buf1[3] = pws[gid].i[7];
+
+  const u32 pw_l_len = pws[gid].pw_len & 63;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
+
+    const u32x out_len = pw_l_len + pw_r_len;
+
+    /**
+     * concat password candidate
+     */
+
+    u32x wordl0[4] = { 0 };
+    u32x wordl1[4] = { 0 };
+    u32x wordl2[4] = { 0 };
+    u32x wordl3[4] = { 0 };
+
+    wordl0[0] = pw_buf0[0];
+    wordl0[1] = pw_buf0[1];
+    wordl0[2] = pw_buf0[2];
+    wordl0[3] = pw_buf0[3];
+    wordl1[0] = pw_buf1[0];
+    wordl1[1] = pw_buf1[1];
+    wordl1[2] = pw_buf1[2];
+    wordl1[3] = pw_buf1[3];
+
+    u32x wordr0[4] = { 0 };
+    u32x wordr1[4] = { 0 };
+    u32x wordr2[4] = { 0 };
+    u32x wordr3[4] = { 0 };
+
+    wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
+    wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
+    wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
+    wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
+    wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
+    wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
+    wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
+    wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
+
+    if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
+    {
+      switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
+    }
+    else
+    {
+      switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
+    }
+
+    u32x w0[4];
+    u32x w1[4];
+
+    w0[0] = wordl0[0] | wordr0[0];
+    w0[1] = wordl0[1] | wordr0[1];
+    w0[2] = wordl0[2] | wordr0[2];
+    w0[3] = wordl0[3] | wordr0[3];
+    w1[0] = wordl1[0] | wordr1[0];
+    w1[1] = wordl1[1] | wordr1[1];
+    w1[2] = wordl1[2] | wordr1[2];
+    w1[3] = wordl1[3] | wordr1[3];
+
+    u32x m[16];
+
+    m[ 0] = w0[0];
+    m[ 1] = w0[1];
+    m[ 2] = w0[2];
+    m[ 3] = w0[3];
+    m[ 4] = w1[0];
+    m[ 5] = w1[1];
+    m[ 6] = w1[2];
+    m[ 7] = w1[3];
+    m[ 8] = 0;
+    m[ 9] = 0;
+    m[10] = 0;
+    m[11] = 0;
+    m[12] = 0;
+    m[13] = 0;
+    m[14] = 0;
+    m[15] = 0;
+
+    u32x h[8];
+
+    h[0] = BLAKE2S_IV_00 ^ 0x01010020;
+    h[1] = BLAKE2S_IV_01;
+    h[2] = BLAKE2S_IV_02;
+    h[3] = BLAKE2S_IV_03;
+    h[4] = BLAKE2S_IV_04;
+    h[5] = BLAKE2S_IV_05;
+    h[6] = BLAKE2S_IV_06;
+    h[7] = BLAKE2S_IV_07;
+
+    blake2s_transform_vector (h, m, out_len, BLAKE2S_FINAL);
+
+    const u32x r0 = h[DGST_R0];
+    const u32x r1 = h[DGST_R1];
+    const u32x r2 = h[DGST_R2];
+    const u32x r3 = h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_s08 (KERN_ATTR_BASIC ())
+{
+}
+
+KERNEL_FQ void m31000_s16 (KERN_ATTR_BASIC ())
+{
+}
diff --git a/OpenCL/m31000_a1-pure.cl b/OpenCL/m31000_a1-pure.cl
new file mode 100644
index 000000000..e90aa0d46
--- /dev/null
+++ b/OpenCL/m31000_a1-pure.cl
@@ -0,0 +1,109 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+//#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_scalar.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl)
+#endif
+
+KERNEL_FQ void m31000_mxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  blake2s_ctx_t ctx0;
+
+  blake2s_init (&ctx0);
+
+  blake2s_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    blake2s_ctx_t ctx = ctx0;
+
+    blake2s_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    blake2s_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SCALAR (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_sxx (KERN_ATTR_BASIC ())
+{
+  /**
+   * base
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  blake2s_ctx_t ctx0;
+
+  blake2s_init (&ctx0);
+
+  blake2s_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
+
+  /**
+   * loop
+   */
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
+  {
+    blake2s_ctx_t ctx = ctx0;
+
+    blake2s_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
+
+    blake2s_final (&ctx);
+
+    const u32 r0 = ctx.h[DGST_R0];
+    const u32 r1 = ctx.h[DGST_R1];
+    const u32 r2 = ctx.h[DGST_R2];
+    const u32 r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SCALAR (r0, r1, r2, r3);
+  }
+}
diff --git a/OpenCL/m31000_a3-optimized.cl b/OpenCL/m31000_a3-optimized.cl
new file mode 100644
index 000000000..876542c05
--- /dev/null
+++ b/OpenCL/m31000_a3-optimized.cl
@@ -0,0 +1,407 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl)
+#endif
+
+DECLSPEC void m31000m (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ())
+{
+  /**
+   * modifiers are taken from args
+   */
+
+  /**
+   * loop
+   */
+
+  u32 w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+    const u32x w0x = w0l | w0r;
+
+    u32x w0[4];
+    u32x w1[4];
+
+    w0[0] = w0x;
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+
+    u32x m[16];
+
+    m[ 0] = w0[0];
+    m[ 1] = w0[1];
+    m[ 2] = w0[2];
+    m[ 3] = w0[3];
+    m[ 4] = w1[0];
+    m[ 5] = w1[1];
+    m[ 6] = w1[2];
+    m[ 7] = w1[3];
+    m[ 8] = 0;
+    m[ 9] = 0;
+    m[10] = 0;
+    m[11] = 0;
+    m[12] = 0;
+    m[13] = 0;
+    m[14] = 0;
+    m[15] = 0;
+
+    u32x h[8];
+
+    h[0] = BLAKE2S_IV_00 ^ 0x01010020;
+    h[1] = BLAKE2S_IV_01;
+    h[2] = BLAKE2S_IV_02;
+    h[3] = BLAKE2S_IV_03;
+    h[4] = BLAKE2S_IV_04;
+    h[5] = BLAKE2S_IV_05;
+    h[6] = BLAKE2S_IV_06;
+    h[7] = BLAKE2S_IV_07;
+
+    blake2s_transform_vector (h, m, pw_len, BLAKE2S_FINAL);
+
+    const u32x r0 = h[DGST_R0];
+    const u32x r1 = h[DGST_R1];
+    const u32x r2 = h[DGST_R2];
+    const u32x r3 = h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+DECLSPEC void m31000s (PRIVATE_AS u32 *w, const u32 pw_len, KERN_ATTR_FUNC_VECTOR ())
+{
+  /**
+   * modifiers are taken from args
+   */
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * loop
+   */
+
+  u32 w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+    const u32x w0x = w0l | w0r;
+
+    u32x w0[4];
+    u32x w1[4];
+
+    w0[0] = w0x;
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+
+    u32x m[16];
+
+    m[ 0] = w0[0];
+    m[ 1] = w0[1];
+    m[ 2] = w0[2];
+    m[ 3] = w0[3];
+    m[ 4] = w1[0];
+    m[ 5] = w1[1];
+    m[ 6] = w1[2];
+    m[ 7] = w1[3];
+    m[ 8] = 0;
+    m[ 9] = 0;
+    m[10] = 0;
+    m[11] = 0;
+    m[12] = 0;
+    m[13] = 0;
+    m[14] = 0;
+    m[15] = 0;
+
+    u32x h[8];
+
+    h[0] = BLAKE2S_IV_00 ^ 0x01010020;
+    h[1] = BLAKE2S_IV_01;
+    h[2] = BLAKE2S_IV_02;
+    h[3] = BLAKE2S_IV_03;
+    h[4] = BLAKE2S_IV_04;
+    h[5] = BLAKE2S_IV_05;
+    h[6] = BLAKE2S_IV_06;
+    h[7] = BLAKE2S_IV_07;
+
+    blake2s_transform_vector (h, m, pw_len, BLAKE2S_FINAL);
+
+    const u32x r0 = h[DGST_R0];
+    const u32x r1 = h[DGST_R1];
+    const u32x r2 = h[DGST_R2];
+    const u32x r3 = h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_m04 (KERN_ATTR_VECTOR ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w[16];
+
+  w[ 0] = pws[gid].i[ 0];
+  w[ 1] = pws[gid].i[ 1];
+  w[ 2] = pws[gid].i[ 2];
+  w[ 3] = pws[gid].i[ 3];
+  w[ 4] = 0;
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m31000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m31000_m08 (KERN_ATTR_VECTOR ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w[16];
+
+  w[ 0] = pws[gid].i[ 0];
+  w[ 1] = pws[gid].i[ 1];
+  w[ 2] = pws[gid].i[ 2];
+  w[ 3] = pws[gid].i[ 3];
+  w[ 4] = pws[gid].i[ 4];
+  w[ 5] = pws[gid].i[ 5];
+  w[ 6] = pws[gid].i[ 6];
+  w[ 7] = pws[gid].i[ 7];
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m31000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m31000_m16 (KERN_ATTR_VECTOR ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w[16];
+
+  w[ 0] = pws[gid].i[ 0];
+  w[ 1] = pws[gid].i[ 1];
+  w[ 2] = pws[gid].i[ 2];
+  w[ 3] = pws[gid].i[ 3];
+  w[ 4] = pws[gid].i[ 4];
+  w[ 5] = pws[gid].i[ 5];
+  w[ 6] = pws[gid].i[ 6];
+  w[ 7] = pws[gid].i[ 7];
+  w[ 8] = pws[gid].i[ 8];
+  w[ 9] = pws[gid].i[ 9];
+  w[10] = pws[gid].i[10];
+  w[11] = pws[gid].i[11];
+  w[12] = pws[gid].i[12];
+  w[13] = pws[gid].i[13];
+  w[14] = pws[gid].i[14];
+  w[15] = pws[gid].i[15];
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m31000m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m31000_s04 (KERN_ATTR_VECTOR ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w[16];
+
+  w[ 0] = pws[gid].i[ 0];
+  w[ 1] = pws[gid].i[ 1];
+  w[ 2] = pws[gid].i[ 2];
+  w[ 3] = pws[gid].i[ 3];
+  w[ 4] = 0;
+  w[ 5] = 0;
+  w[ 6] = 0;
+  w[ 7] = 0;
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m31000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m31000_s08 (KERN_ATTR_VECTOR ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w[16];
+
+  w[ 0] = pws[gid].i[ 0];
+  w[ 1] = pws[gid].i[ 1];
+  w[ 2] = pws[gid].i[ 2];
+  w[ 3] = pws[gid].i[ 3];
+  w[ 4] = pws[gid].i[ 4];
+  w[ 5] = pws[gid].i[ 5];
+  w[ 6] = pws[gid].i[ 6];
+  w[ 7] = pws[gid].i[ 7];
+  w[ 8] = 0;
+  w[ 9] = 0;
+  w[10] = 0;
+  w[11] = 0;
+  w[12] = 0;
+  w[13] = 0;
+  w[14] = 0;
+  w[15] = 0;
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m31000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
+
+KERNEL_FQ void m31000_s16 (KERN_ATTR_VECTOR ())
+{
+  /**
+   * base
+   */
+
+  const u64 lid = get_local_id (0);
+  const u64 gid = get_global_id (0);
+  const u64 lsz = get_local_size (0);
+
+  if (gid >= GID_CNT) return;
+
+  u32 w[16];
+
+  w[ 0] = pws[gid].i[ 0];
+  w[ 1] = pws[gid].i[ 1];
+  w[ 2] = pws[gid].i[ 2];
+  w[ 3] = pws[gid].i[ 3];
+  w[ 4] = pws[gid].i[ 4];
+  w[ 5] = pws[gid].i[ 5];
+  w[ 6] = pws[gid].i[ 6];
+  w[ 7] = pws[gid].i[ 7];
+  w[ 8] = pws[gid].i[ 8];
+  w[ 9] = pws[gid].i[ 9];
+  w[10] = pws[gid].i[10];
+  w[11] = pws[gid].i[11];
+  w[12] = pws[gid].i[12];
+  w[13] = pws[gid].i[13];
+  w[14] = pws[gid].i[14];
+  w[15] = pws[gid].i[15];
+
+  const u32 pw_len = pws[gid].pw_len & 63;
+
+  /**
+   * main
+   */
+
+  m31000s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
+}
diff --git a/OpenCL/m31000_a3-pure.cl b/OpenCL/m31000_a3-pure.cl
new file mode 100644
index 000000000..24ce691db
--- /dev/null
+++ b/OpenCL/m31000_a3-pure.cl
@@ -0,0 +1,131 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#define NEW_SIMD_CODE
+
+#ifdef KERNEL_STATIC
+#include M2S(INCLUDE_PATH/inc_vendor.h)
+#include M2S(INCLUDE_PATH/inc_types.h)
+#include M2S(INCLUDE_PATH/inc_platform.cl)
+#include M2S(INCLUDE_PATH/inc_common.cl)
+#include M2S(INCLUDE_PATH/inc_simd.cl)
+#include M2S(INCLUDE_PATH/inc_hash_blake2s.cl)
+#endif
+
+KERNEL_FQ void m31000_mxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    blake2s_ctx_vector_t ctx;
+
+    blake2s_init_vector   (&ctx);
+    blake2s_update_vector (&ctx, w, pw_len);
+    blake2s_final_vector  (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_M_SIMD (r0, r1, r2, r3);
+  }
+}
+
+KERNEL_FQ void m31000_sxx (KERN_ATTR_VECTOR ())
+{
+  /**
+   * modifier
+   */
+
+  const u64 gid = get_global_id (0);
+
+  if (gid >= GID_CNT) return;
+
+  /**
+   * digest
+   */
+
+  const u32 search[4] =
+  {
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
+    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
+  };
+
+  /**
+   * base
+   */
+
+  const u32 pw_len = pws[gid].pw_len;
+
+  u32x w[64] = { 0 };
+
+  for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
+  {
+    w[idx] = pws[gid].i[idx];
+  }
+
+  /**
+   * loop
+   */
+
+  u32x w0l = w[0];
+
+  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
+  {
+    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
+
+    const u32x w0 = w0l | w0r;
+
+    w[0] = w0;
+
+    blake2s_ctx_vector_t ctx;
+
+    blake2s_init_vector   (&ctx);
+    blake2s_update_vector (&ctx, w, pw_len);
+    blake2s_final_vector  (&ctx);
+
+    const u32x r0 = ctx.h[DGST_R0];
+    const u32x r1 = ctx.h[DGST_R1];
+    const u32x r2 = ctx.h[DGST_R2];
+    const u32x r3 = ctx.h[DGST_R3];
+
+    COMPARE_S_SIMD (r0, r1, r2, r3);
+  }
+}
diff --git a/docs/readme.txt b/docs/readme.txt
index 289d3e935..d2b57eb6e 100644
--- a/docs/readme.txt
+++ b/docs/readme.txt
@@ -58,6 +58,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
 - SHA3-512
 - RIPEMD-160
 - BLAKE2b-512
+- BLAKE2s-256
 - GOST R 34.11-2012 (Streebog) 256-bit, big-endian
 - GOST R 34.11-2012 (Streebog) 512-bit, big-endian
 - GOST R 34.11-94
diff --git a/src/modules/module_31000.c b/src/modules/module_31000.c
new file mode 100644
index 000000000..cf35180ac
--- /dev/null
+++ b/src/modules/module_31000.c
@@ -0,0 +1,247 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "common.h"
+#include "types.h"
+#include "modules.h"
+#include "bitops.h"
+#include "convert.h"
+#include "shared.h"
+
+static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL;
+static const u32 DGST_POS0 = 1;
+static const u32 DGST_POS1 = 0;
+static const u32 DGST_POS2 = 3;
+static const u32 DGST_POS3 = 2;
+static const u32 DGST_SIZE = DGST_SIZE_4_8;
+static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH;
+static const char *HASH_NAME = "BLAKE2s-256";
+static const u64 KERN_TYPE = 31000;
+static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_USES_BITS_32 | OPTI_TYPE_RAW_HASH;
+static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE;
+static const u32 SALT_TYPE = SALT_TYPE_NONE;
+static const char *ST_PASS = "hashcat";
+static const char *ST_HASH = "$BLAKE2$2c719b484789ad5f6fc1739012182169b25484af156adc91d4f64f72400e574a";
+
+u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return ATTACK_EXEC;
+}
+
+u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return DGST_POS0;
+}
+
+u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return DGST_POS1;
+}
+
+u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return DGST_POS2;
+}
+
+u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return DGST_POS3;
+}
+
+u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return DGST_SIZE;
+}
+
+u32 module_hash_category (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return HASH_CATEGORY;
+}
+
+const char *module_hash_name (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return HASH_NAME;
+}
+
+u64 module_kern_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return KERN_TYPE;
+}
+
+u32 module_opti_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return OPTI_TYPE;
+}
+
+u64 module_opts_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return OPTS_TYPE;
+}
+
+u32 module_salt_type (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return SALT_TYPE;
+}
+
+const char *module_st_hash (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return ST_HASH;
+}
+
+const char *module_st_pass (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const user_options_t * user_options, MAYBE_UNUSED const user_options_extra_t * user_options_extra)
+{
+  return ST_PASS;
+}
+
+static const char *SIGNATURE_BLAKE2S = "$BLAKE2$";
+
+int module_hash_decode (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t * salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t * hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
+{
+  u32 *digest = (u32 *) digest_buf;
+
+  hc_token_t token;
+
+  token.token_cnt = 2;
+
+  token.signatures_cnt = 1;
+  token.signatures_buf[0] = SIGNATURE_BLAKE2S;
+
+  token.len[0] = 8;
+  token.attr[0] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_SIGNATURE;
+
+  token.len[1] = 64;
+  token.attr[1] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_HEX;
+
+  const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
+
+  if (rc_tokenizer != PARSER_OK)
+    return (rc_tokenizer);
+
+  const u8 *hash_pos = token.buf[1];
+
+  digest[0] = hex_to_u32 (hash_pos + 0);
+  digest[1] = hex_to_u32 (hash_pos + 8);
+  digest[2] = hex_to_u32 (hash_pos + 16);
+  digest[3] = hex_to_u32 (hash_pos + 24);
+  digest[4] = hex_to_u32 (hash_pos + 32);
+  digest[5] = hex_to_u32 (hash_pos + 40);
+  digest[6] = hex_to_u32 (hash_pos + 48);
+  digest[7] = hex_to_u32 (hash_pos + 56);
+
+  return (PARSER_OK);
+}
+
+int module_hash_encode (MAYBE_UNUSED const hashconfig_t * hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t * salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t * hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
+{
+  const u32 *digest = (const u32 *) digest_buf;
+
+  // we can not change anything in the original buffer, otherwise destroying sorting
+  // therefore create some local buffer
+
+  u8 *out_buf = (u8 *) line_buf;
+
+  int out_len = strlen (SIGNATURE_BLAKE2S);
+
+  memcpy (out_buf, SIGNATURE_BLAKE2S, out_len);
+
+  u32_to_hex (digest[0], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[1], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[2], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[3], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[4], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[5], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[6], out_buf + out_len);
+  out_len += 8;
+  u32_to_hex (digest[7], out_buf + out_len);
+  out_len += 8;
+
+  return out_len;
+}
+
+void module_init (module_ctx_t * module_ctx)
+{
+  module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT;
+  module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT;
+
+  module_ctx->module_attack_exec = module_attack_exec;
+  module_ctx->module_benchmark_esalt = MODULE_DEFAULT;
+  module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT;
+  module_ctx->module_benchmark_mask = MODULE_DEFAULT;
+  module_ctx->module_benchmark_charset = MODULE_DEFAULT;
+  module_ctx->module_benchmark_salt = MODULE_DEFAULT;
+  module_ctx->module_build_plain_postprocess = MODULE_DEFAULT;
+  module_ctx->module_deep_comp_kernel = MODULE_DEFAULT;
+  module_ctx->module_deprecated_notice = MODULE_DEFAULT;
+  module_ctx->module_dgst_pos0 = module_dgst_pos0;
+  module_ctx->module_dgst_pos1 = module_dgst_pos1;
+  module_ctx->module_dgst_pos2 = module_dgst_pos2;
+  module_ctx->module_dgst_pos3 = module_dgst_pos3;
+  module_ctx->module_dgst_size = module_dgst_size;
+  module_ctx->module_dictstat_disable = MODULE_DEFAULT;
+  module_ctx->module_esalt_size = MODULE_DEFAULT;
+  module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
+  module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
+  module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
+  module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_count = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_parse = MODULE_DEFAULT;
+  module_ctx->module_hash_binary_save = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_potfile = MODULE_DEFAULT;
+  module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT;
+  module_ctx->module_hash_decode = module_hash_decode;
+  module_ctx->module_hash_encode_status = MODULE_DEFAULT;
+  module_ctx->module_hash_encode_potfile = MODULE_DEFAULT;
+  module_ctx->module_hash_encode = module_hash_encode;
+  module_ctx->module_hash_init_selftest = MODULE_DEFAULT;
+  module_ctx->module_hash_mode = MODULE_DEFAULT;
+  module_ctx->module_hash_category = module_hash_category;
+  module_ctx->module_hash_name = module_hash_name;
+  module_ctx->module_hashes_count_min = MODULE_DEFAULT;
+  module_ctx->module_hashes_count_max = MODULE_DEFAULT;
+  module_ctx->module_hlfmt_disable = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_size = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_init = MODULE_DEFAULT;
+  module_ctx->module_hook_extra_param_term = MODULE_DEFAULT;
+  module_ctx->module_hook12 = MODULE_DEFAULT;
+  module_ctx->module_hook23 = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size = MODULE_DEFAULT;
+  module_ctx->module_hook_size = MODULE_DEFAULT;
+  module_ctx->module_jit_build_options = MODULE_DEFAULT;
+  module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
+  module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
+  module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
+  module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
+  module_ctx->module_kern_type = module_kern_type;
+  module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
+  module_ctx->module_opti_type = module_opti_type;
+  module_ctx->module_opts_type = module_opts_type;
+  module_ctx->module_outfile_check_disable = MODULE_DEFAULT;
+  module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT;
+  module_ctx->module_potfile_custom_check = MODULE_DEFAULT;
+  module_ctx->module_potfile_disable = MODULE_DEFAULT;
+  module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT;
+  module_ctx->module_pwdump_column = MODULE_DEFAULT;
+  module_ctx->module_pw_max = MODULE_DEFAULT;
+  module_ctx->module_pw_min = MODULE_DEFAULT;
+  module_ctx->module_salt_max = MODULE_DEFAULT;
+  module_ctx->module_salt_min = MODULE_DEFAULT;
+  module_ctx->module_salt_type = module_salt_type;
+  module_ctx->module_separator = MODULE_DEFAULT;
+  module_ctx->module_st_hash = module_st_hash;
+  module_ctx->module_st_pass = module_st_pass;
+  module_ctx->module_tmp_size = MODULE_DEFAULT;
+  module_ctx->module_unstable_warning = MODULE_DEFAULT;
+  module_ctx->module_warmup_disable = MODULE_DEFAULT;
+}
diff --git a/tools/test_modules/m31000.pm b/tools/test_modules/m31000.pm
new file mode 100644
index 000000000..4c7ef55b2
--- /dev/null
+++ b/tools/test_modules/m31000.pm
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+
+##
+## Author......: See docs/credits.txt
+## License.....: MIT
+##
+
+use strict;
+use warnings;
+
+use Crypt::Digest::BLAKE2s_256 qw (blake2s_256_hex);
+
+sub module_constraints { [[0, 128], [-1, -1], [0, 64], [-1, -1], [-1, -1]] }
+
+sub module_generate_hash
+{
+  my $word = shift;
+
+  my $digest = blake2s_256_hex ($word);
+
+  my $hash = sprintf ("\$BLAKE2\$" . lc ($digest));
+
+  return $hash;
+}
+
+sub module_verify_hash
+{
+  my $line = shift;
+
+  my ($hash, $word) = split (':', $line);
+
+  return unless defined $hash;
+  return unless defined $word;
+
+  my $word_packed = pack_if_HEX_notation ($word);
+
+  my $new_hash = module_generate_hash ($word_packed);
+
+  return ($new_hash, $word);
+}
+
+1;