From 9c12459852d8c3a8407dac9ed09949fdc73ab885 Mon Sep 17 00:00:00 2001
From: jsteube <jens.steube@gmail.com>
Date: Thu, 13 Jul 2017 12:18:17 +0200
Subject: [PATCH] Add HMAC vector functions to inc_hash_*

---
 OpenCL/inc_hash_md4.cl       | 161 ++++++++++++++++++++++
 OpenCL/inc_hash_md5.cl       | 161 ++++++++++++++++++++++
 OpenCL/inc_hash_ripemd160.cl | 161 ++++++++++++++++++++++
 OpenCL/inc_hash_sha1.cl      | 161 ++++++++++++++++++++++
 OpenCL/inc_hash_sha256.cl    | 161 ++++++++++++++++++++++
 OpenCL/inc_hash_sha384.cl    | 255 ++++++++++++++++++++++++++++++++++-
 OpenCL/inc_hash_sha512.cl    | 253 ++++++++++++++++++++++++++++++++++
 OpenCL/inc_hash_whirlpool.cl | 161 ++++++++++++++++++++++
 8 files changed, 1473 insertions(+), 1 deletion(-)

diff --git a/OpenCL/inc_hash_md4.cl b/OpenCL/inc_hash_md4.cl
index 22ca9ba7b..bae8456fe 100644
--- a/OpenCL/inc_hash_md4.cl
+++ b/OpenCL/inc_hash_md4.cl
@@ -1357,3 +1357,164 @@ void md4_final_vector (md4_ctx_vector_t *ctx)
 
   md4_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct md4_hmac_ctx_vector
+{
+  md4_ctx_vector_t ipad;
+  md4_ctx_vector_t opad;
+
+} md4_hmac_ctx_vector_t;
+
+void md4_hmac_init_vector_64 (md4_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+
+  md4_init_vector (&ctx->ipad);
+
+  md4_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  md4_init_vector (&ctx->opad);
+
+  md4_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+}
+
+void md4_hmac_init_vector (md4_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    md4_ctx_vector_t tmp;
+
+    md4_init_vector (&tmp);
+
+    md4_update_vector (&tmp, w, len);
+
+    md4_final_vector (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = 0;
+    w1[1] = 0;
+    w1[2] = 0;
+    w1[3] = 0;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  md4_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
+}
+
+void md4_hmac_update_vector_64 (md4_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
+{
+  md4_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+void md4_hmac_update_vector (md4_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  md4_update_vector (&ctx->ipad, w, len);
+}
+
+void md4_hmac_final_vector (md4_hmac_ctx_vector_t *ctx)
+{
+  md4_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  t0[0] = ctx->ipad.h[0];
+  t0[1] = ctx->ipad.h[1];
+  t0[2] = ctx->ipad.h[2];
+  t0[3] = ctx->ipad.h[3];
+  t1[0] = 0;
+  t1[1] = 0;
+  t1[2] = 0;
+  t1[3] = 0;
+  t2[0] = 0;
+  t2[1] = 0;
+  t2[2] = 0;
+  t2[3] = 0;
+  t3[0] = 0;
+  t3[1] = 0;
+  t3[2] = 0;
+  t3[3] = 0;
+
+  md4_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 16);
+
+  md4_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_md5.cl b/OpenCL/inc_hash_md5.cl
index 9c897c093..379af9488 100644
--- a/OpenCL/inc_hash_md5.cl
+++ b/OpenCL/inc_hash_md5.cl
@@ -1425,3 +1425,164 @@ void md5_final_vector (md5_ctx_vector_t *ctx)
 
   md5_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct md5_hmac_ctx_vector
+{
+  md5_ctx_vector_t ipad;
+  md5_ctx_vector_t opad;
+
+} md5_hmac_ctx_vector_t;
+
+void md5_hmac_init_vector_64 (md5_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+
+  md5_init_vector (&ctx->ipad);
+
+  md5_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  md5_init_vector (&ctx->opad);
+
+  md5_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+}
+
+void md5_hmac_init_vector (md5_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    md5_ctx_vector_t tmp;
+
+    md5_init_vector (&tmp);
+
+    md5_update_vector (&tmp, w, len);
+
+    md5_final_vector (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = 0;
+    w1[1] = 0;
+    w1[2] = 0;
+    w1[3] = 0;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  md5_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
+}
+
+void md5_hmac_update_vector_64 (md5_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
+{
+  md5_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+void md5_hmac_update_vector (md5_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  md5_update_vector (&ctx->ipad, w, len);
+}
+
+void md5_hmac_final_vector (md5_hmac_ctx_vector_t *ctx)
+{
+  md5_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  t0[0] = ctx->ipad.h[0];
+  t0[1] = ctx->ipad.h[1];
+  t0[2] = ctx->ipad.h[2];
+  t0[3] = ctx->ipad.h[3];
+  t1[0] = 0;
+  t1[1] = 0;
+  t1[2] = 0;
+  t1[3] = 0;
+  t2[0] = 0;
+  t2[1] = 0;
+  t2[2] = 0;
+  t2[3] = 0;
+  t3[0] = 0;
+  t3[1] = 0;
+  t3[2] = 0;
+  t3[3] = 0;
+
+  md5_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 16);
+
+  md5_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_ripemd160.cl b/OpenCL/inc_hash_ripemd160.cl
index 9519f5961..1f896063d 100644
--- a/OpenCL/inc_hash_ripemd160.cl
+++ b/OpenCL/inc_hash_ripemd160.cl
@@ -1626,3 +1626,164 @@ void ripemd160_final_vector (ripemd160_ctx_vector_t *ctx)
 
   ripemd160_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct ripemd160_hmac_ctx_vector
+{
+  ripemd160_ctx_vector_t ipad;
+  ripemd160_ctx_vector_t opad;
+
+} ripemd160_hmac_ctx_vector_t;
+
+void ripemd160_hmac_init_vector_64 (ripemd160_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+
+  ripemd160_init_vector (&ctx->ipad);
+
+  ripemd160_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  ripemd160_init_vector (&ctx->opad);
+
+  ripemd160_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+}
+
+void ripemd160_hmac_init_vector (ripemd160_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    ripemd160_ctx_vector_t tmp;
+
+    ripemd160_init_vector (&tmp);
+
+    ripemd160_update_vector (&tmp, w, len);
+
+    ripemd160_final_vector (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = 0;
+    w1[2] = 0;
+    w1[3] = 0;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  ripemd160_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
+}
+
+void ripemd160_hmac_update_vector_64 (ripemd160_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
+{
+  ripemd160_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+void ripemd160_hmac_update_vector (ripemd160_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  ripemd160_update_vector (&ctx->ipad, w, len);
+}
+
+void ripemd160_hmac_final_vector (ripemd160_hmac_ctx_vector_t *ctx)
+{
+  ripemd160_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  t0[0] = ctx->ipad.h[0];
+  t0[1] = ctx->ipad.h[1];
+  t0[2] = ctx->ipad.h[2];
+  t0[3] = ctx->ipad.h[3];
+  t1[0] = ctx->ipad.h[4];
+  t1[1] = 0;
+  t1[2] = 0;
+  t1[3] = 0;
+  t2[0] = 0;
+  t2[1] = 0;
+  t2[2] = 0;
+  t2[3] = 0;
+  t3[0] = 0;
+  t3[1] = 0;
+  t3[2] = 0;
+  t3[3] = 0;
+
+  ripemd160_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 20);
+
+  ripemd160_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_sha1.cl b/OpenCL/inc_hash_sha1.cl
index b8d85b8f3..58089e266 100644
--- a/OpenCL/inc_hash_sha1.cl
+++ b/OpenCL/inc_hash_sha1.cl
@@ -1495,3 +1495,164 @@ void sha1_final_vector (sha1_ctx_vector_t *ctx)
 
   sha1_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct sha1_hmac_ctx_vector
+{
+  sha1_ctx_vector_t ipad;
+  sha1_ctx_vector_t opad;
+
+} sha1_hmac_ctx_vector_t;
+
+void sha1_hmac_init_vector_64 (sha1_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+
+  sha1_init_vector (&ctx->ipad);
+
+  sha1_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  sha1_init_vector (&ctx->opad);
+
+  sha1_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+}
+
+void sha1_hmac_init_vector (sha1_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    sha1_ctx_vector_t tmp;
+
+    sha1_init_vector (&tmp);
+
+    sha1_update_vector (&tmp, w, len);
+
+    sha1_final_vector (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = 0;
+    w1[2] = 0;
+    w1[3] = 0;
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  sha1_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
+}
+
+void sha1_hmac_update_vector_64 (sha1_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
+{
+  sha1_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+void sha1_hmac_update_vector (sha1_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  sha1_update_vector (&ctx->ipad, w, len);
+}
+
+void sha1_hmac_final_vector (sha1_hmac_ctx_vector_t *ctx)
+{
+  sha1_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  t0[0] = ctx->ipad.h[0];
+  t0[1] = ctx->ipad.h[1];
+  t0[2] = ctx->ipad.h[2];
+  t0[3] = ctx->ipad.h[3];
+  t1[0] = ctx->ipad.h[4];
+  t1[1] = 0;
+  t1[2] = 0;
+  t1[3] = 0;
+  t2[0] = 0;
+  t2[1] = 0;
+  t2[2] = 0;
+  t2[3] = 0;
+  t3[0] = 0;
+  t3[1] = 0;
+  t3[2] = 0;
+  t3[3] = 0;
+
+  sha1_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 20);
+
+  sha1_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_sha256.cl b/OpenCL/inc_hash_sha256.cl
index afbdbc3b5..f0bb9ba33 100644
--- a/OpenCL/inc_hash_sha256.cl
+++ b/OpenCL/inc_hash_sha256.cl
@@ -1448,3 +1448,164 @@ void sha256_final_vector (sha256_ctx_vector_t *ctx)
 
   sha256_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct sha256_hmac_ctx_vector
+{
+  sha256_ctx_vector_t ipad;
+  sha256_ctx_vector_t opad;
+
+} sha256_hmac_ctx_vector_t;
+
+void sha256_hmac_init_vector_64 (sha256_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+
+  sha256_init_vector (&ctx->ipad);
+
+  sha256_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  sha256_init_vector (&ctx->opad);
+
+  sha256_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+}
+
+void sha256_hmac_init_vector (sha256_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    sha256_ctx_vector_t tmp;
+
+    sha256_init_vector (&tmp);
+
+    sha256_update_vector (&tmp, w, len);
+
+    sha256_final_vector (&tmp);
+
+    w0[0] = tmp.h[0];
+    w0[1] = tmp.h[1];
+    w0[2] = tmp.h[2];
+    w0[3] = tmp.h[3];
+    w1[0] = tmp.h[4];
+    w1[1] = tmp.h[5];
+    w1[2] = tmp.h[6];
+    w1[3] = tmp.h[7];
+    w2[0] = 0;
+    w2[1] = 0;
+    w2[2] = 0;
+    w2[3] = 0;
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  sha256_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
+}
+
+void sha256_hmac_update_vector_64 (sha256_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
+{
+  sha256_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+void sha256_hmac_update_vector (sha256_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  sha256_update_vector (&ctx->ipad, w, len);
+}
+
+void sha256_hmac_final_vector (sha256_hmac_ctx_vector_t *ctx)
+{
+  sha256_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  t0[0] = ctx->ipad.h[0];
+  t0[1] = ctx->ipad.h[1];
+  t0[2] = ctx->ipad.h[2];
+  t0[3] = ctx->ipad.h[3];
+  t1[0] = ctx->ipad.h[4];
+  t1[1] = ctx->ipad.h[5];
+  t1[2] = ctx->ipad.h[6];
+  t1[3] = ctx->ipad.h[7];
+  t2[0] = 0;
+  t2[1] = 0;
+  t2[2] = 0;
+  t2[3] = 0;
+  t3[0] = 0;
+  t3[1] = 0;
+  t3[2] = 0;
+  t3[3] = 0;
+
+  sha256_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 32);
+
+  sha256_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_sha384.cl b/OpenCL/inc_hash_sha384.cl
index 84c961ba1..f4fcddd3f 100644
--- a/OpenCL/inc_hash_sha384.cl
+++ b/OpenCL/inc_hash_sha384.cl
@@ -1693,7 +1693,7 @@ void sha384_hmac_final (sha384_hmac_ctx_t *ctx)
   t7[2] = 0;
   t7[3] = 0;
 
-  sha384_update_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 64);
+  sha384_update_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 48);
 
   sha384_final (&ctx->opad);
 }
@@ -2231,3 +2231,256 @@ void sha384_final_vector (sha384_ctx_vector_t *ctx)
 
   sha384_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct sha384_hmac_ctx_vector
+{
+  sha384_ctx_vector_t ipad;
+  sha384_ctx_vector_t opad;
+
+} sha384_hmac_ctx_vector_t;
+
+void sha384_hmac_init_vector_128 (sha384_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x w4[4], const u32x w5[4], const u32x w6[4], const u32x w7[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+  u32x t4[4];
+  u32x t5[4];
+  u32x t6[4];
+  u32x t7[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+  t4[0] = w4[0] ^ 0x36363636;
+  t4[1] = w4[1] ^ 0x36363636;
+  t4[2] = w4[2] ^ 0x36363636;
+  t4[3] = w4[3] ^ 0x36363636;
+  t5[0] = w5[0] ^ 0x36363636;
+  t5[1] = w5[1] ^ 0x36363636;
+  t5[2] = w5[2] ^ 0x36363636;
+  t5[3] = w5[3] ^ 0x36363636;
+  t6[0] = w6[0] ^ 0x36363636;
+  t6[1] = w6[1] ^ 0x36363636;
+  t6[2] = w6[2] ^ 0x36363636;
+  t6[3] = w6[3] ^ 0x36363636;
+  t7[0] = w7[0] ^ 0x36363636;
+  t7[1] = w7[1] ^ 0x36363636;
+  t7[2] = w7[2] ^ 0x36363636;
+  t7[3] = w7[3] ^ 0x36363636;
+
+  sha384_init_vector (&ctx->ipad);
+
+  sha384_update_vector_128 (&ctx->ipad, t0, t1, t2, t3, t4, t5, t6, t7, 128);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+  t4[0] = w4[0] ^ 0x5c5c5c5c;
+  t4[1] = w4[1] ^ 0x5c5c5c5c;
+  t4[2] = w4[2] ^ 0x5c5c5c5c;
+  t4[3] = w4[3] ^ 0x5c5c5c5c;
+  t5[0] = w5[0] ^ 0x5c5c5c5c;
+  t5[1] = w5[1] ^ 0x5c5c5c5c;
+  t5[2] = w5[2] ^ 0x5c5c5c5c;
+  t5[3] = w5[3] ^ 0x5c5c5c5c;
+  t6[0] = w6[0] ^ 0x5c5c5c5c;
+  t6[1] = w6[1] ^ 0x5c5c5c5c;
+  t6[2] = w6[2] ^ 0x5c5c5c5c;
+  t6[3] = w6[3] ^ 0x5c5c5c5c;
+  t7[0] = w7[0] ^ 0x5c5c5c5c;
+  t7[1] = w7[1] ^ 0x5c5c5c5c;
+  t7[2] = w7[2] ^ 0x5c5c5c5c;
+  t7[3] = w7[3] ^ 0x5c5c5c5c;
+
+  sha384_init_vector (&ctx->opad);
+
+  sha384_update_vector_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 128);
+}
+
+void sha384_hmac_init_vector (sha384_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+  u32x w4[4];
+  u32x w5[4];
+  u32x w6[4];
+  u32x w7[4];
+
+  if (len > 128)
+  {
+    sha384_ctx_vector_t tmp;
+
+    sha384_init_vector (&tmp);
+
+    sha384_update_vector (&tmp, w, len);
+
+    sha384_final_vector (&tmp);
+
+    w0[0] = h32_from_64 (tmp.h[0]);
+    w0[1] = l32_from_64 (tmp.h[0]);
+    w0[2] = h32_from_64 (tmp.h[1]);
+    w0[3] = l32_from_64 (tmp.h[1]);
+    w1[0] = h32_from_64 (tmp.h[2]);
+    w1[1] = l32_from_64 (tmp.h[2]);
+    w1[2] = h32_from_64 (tmp.h[3]);
+    w1[3] = l32_from_64 (tmp.h[3]);
+    w2[0] = h32_from_64 (tmp.h[4]);
+    w2[1] = l32_from_64 (tmp.h[4]);
+    w2[2] = h32_from_64 (tmp.h[5]);
+    w2[3] = l32_from_64 (tmp.h[5]);
+    w3[0] = 0;
+    w3[1] = 0;
+    w3[2] = 0;
+    w3[3] = 0;
+    w4[0] = 0;
+    w4[1] = 0;
+    w4[2] = 0;
+    w4[3] = 0;
+    w5[0] = 0;
+    w5[1] = 0;
+    w5[2] = 0;
+    w5[3] = 0;
+    w6[0] = 0;
+    w6[1] = 0;
+    w6[2] = 0;
+    w6[3] = 0;
+    w7[0] = 0;
+    w7[1] = 0;
+    w7[2] = 0;
+    w7[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+    w4[0] = w[16];
+    w4[1] = w[17];
+    w4[2] = w[18];
+    w4[3] = w[19];
+    w5[0] = w[20];
+    w5[1] = w[21];
+    w5[2] = w[22];
+    w5[3] = w[23];
+    w6[0] = w[24];
+    w6[1] = w[25];
+    w6[2] = w[26];
+    w6[3] = w[27];
+    w7[0] = w[28];
+    w7[1] = w[29];
+    w7[2] = w[30];
+    w7[3] = w[31];
+  }
+
+  sha384_hmac_init_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
+}
+
+void sha384_hmac_update_vector_128 (sha384_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len)
+{
+  sha384_update_vector_128 (&ctx->ipad, w0, w1, w2, w3, w4, w5, w6, w7, len);
+}
+
+void sha384_hmac_update_vector (sha384_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  sha384_update_vector (&ctx->ipad, w, len);
+}
+
+void sha384_hmac_final_vector (sha384_hmac_ctx_t *ctx)
+{
+  sha384_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+  u32x t4[4];
+  u32x t5[4];
+  u32x t6[4];
+  u32x t7[4];
+
+  t0[0] = h32_from_64 (ctx->ipad.h[0]);
+  t0[1] = l32_from_64 (ctx->ipad.h[0]);
+  t0[2] = h32_from_64 (ctx->ipad.h[1]);
+  t0[3] = l32_from_64 (ctx->ipad.h[1]);
+  t1[0] = h32_from_64 (ctx->ipad.h[2]);
+  t1[1] = l32_from_64 (ctx->ipad.h[2]);
+  t1[2] = h32_from_64 (ctx->ipad.h[3]);
+  t1[3] = l32_from_64 (ctx->ipad.h[3]);
+  t2[0] = h32_from_64 (ctx->ipad.h[4]);
+  t2[1] = l32_from_64 (ctx->ipad.h[4]);
+  t2[2] = h32_from_64 (ctx->ipad.h[5]);
+  t2[3] = l32_from_64 (ctx->ipad.h[5]);
+  t3[0] = 0;
+  t3[1] = 0;
+  t3[2] = 0;
+  t3[3] = 0;
+  t4[0] = 0;
+  t4[1] = 0;
+  t4[2] = 0;
+  t4[3] = 0;
+  t5[0] = 0;
+  t5[1] = 0;
+  t5[2] = 0;
+  t5[3] = 0;
+  t6[0] = 0;
+  t6[1] = 0;
+  t6[2] = 0;
+  t6[3] = 0;
+  t7[0] = 0;
+  t7[1] = 0;
+  t7[2] = 0;
+  t7[3] = 0;
+
+  sha384_update_vector_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 48);
+
+  sha384_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_sha512.cl b/OpenCL/inc_hash_sha512.cl
index dc073157a..eaa170733 100644
--- a/OpenCL/inc_hash_sha512.cl
+++ b/OpenCL/inc_hash_sha512.cl
@@ -2231,3 +2231,256 @@ void sha512_final_vector (sha512_ctx_vector_t *ctx)
 
   sha512_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
 }
+
+// HMAC + Vector
+
+typedef struct sha512_hmac_ctx_vector
+{
+  sha512_ctx_vector_t ipad;
+  sha512_ctx_vector_t opad;
+
+} sha512_hmac_ctx_vector_t;
+
+void sha512_hmac_init_vector_128 (sha512_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], const u32x w4[4], const u32x w5[4], const u32x w6[4], const u32x w7[4])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+  u32x t4[4];
+  u32x t5[4];
+  u32x t6[4];
+  u32x t7[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+  t4[0] = w4[0] ^ 0x36363636;
+  t4[1] = w4[1] ^ 0x36363636;
+  t4[2] = w4[2] ^ 0x36363636;
+  t4[3] = w4[3] ^ 0x36363636;
+  t5[0] = w5[0] ^ 0x36363636;
+  t5[1] = w5[1] ^ 0x36363636;
+  t5[2] = w5[2] ^ 0x36363636;
+  t5[3] = w5[3] ^ 0x36363636;
+  t6[0] = w6[0] ^ 0x36363636;
+  t6[1] = w6[1] ^ 0x36363636;
+  t6[2] = w6[2] ^ 0x36363636;
+  t6[3] = w6[3] ^ 0x36363636;
+  t7[0] = w7[0] ^ 0x36363636;
+  t7[1] = w7[1] ^ 0x36363636;
+  t7[2] = w7[2] ^ 0x36363636;
+  t7[3] = w7[3] ^ 0x36363636;
+
+  sha512_init_vector (&ctx->ipad);
+
+  sha512_update_vector_128 (&ctx->ipad, t0, t1, t2, t3, t4, t5, t6, t7, 128);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+  t4[0] = w4[0] ^ 0x5c5c5c5c;
+  t4[1] = w4[1] ^ 0x5c5c5c5c;
+  t4[2] = w4[2] ^ 0x5c5c5c5c;
+  t4[3] = w4[3] ^ 0x5c5c5c5c;
+  t5[0] = w5[0] ^ 0x5c5c5c5c;
+  t5[1] = w5[1] ^ 0x5c5c5c5c;
+  t5[2] = w5[2] ^ 0x5c5c5c5c;
+  t5[3] = w5[3] ^ 0x5c5c5c5c;
+  t6[0] = w6[0] ^ 0x5c5c5c5c;
+  t6[1] = w6[1] ^ 0x5c5c5c5c;
+  t6[2] = w6[2] ^ 0x5c5c5c5c;
+  t6[3] = w6[3] ^ 0x5c5c5c5c;
+  t7[0] = w7[0] ^ 0x5c5c5c5c;
+  t7[1] = w7[1] ^ 0x5c5c5c5c;
+  t7[2] = w7[2] ^ 0x5c5c5c5c;
+  t7[3] = w7[3] ^ 0x5c5c5c5c;
+
+  sha512_init_vector (&ctx->opad);
+
+  sha512_update_vector_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 128);
+}
+
+void sha512_hmac_init_vector (sha512_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+  u32x w4[4];
+  u32x w5[4];
+  u32x w6[4];
+  u32x w7[4];
+
+  if (len > 128)
+  {
+    sha512_ctx_vector_t tmp;
+
+    sha512_init_vector (&tmp);
+
+    sha512_update_vector (&tmp, w, len);
+
+    sha512_final_vector (&tmp);
+
+    w0[0] = h32_from_64 (tmp.h[0]);
+    w0[1] = l32_from_64 (tmp.h[0]);
+    w0[2] = h32_from_64 (tmp.h[1]);
+    w0[3] = l32_from_64 (tmp.h[1]);
+    w1[0] = h32_from_64 (tmp.h[2]);
+    w1[1] = l32_from_64 (tmp.h[2]);
+    w1[2] = h32_from_64 (tmp.h[3]);
+    w1[3] = l32_from_64 (tmp.h[3]);
+    w2[0] = h32_from_64 (tmp.h[4]);
+    w2[1] = l32_from_64 (tmp.h[4]);
+    w2[2] = h32_from_64 (tmp.h[5]);
+    w2[3] = l32_from_64 (tmp.h[5]);
+    w3[0] = h32_from_64 (tmp.h[6]);
+    w3[1] = l32_from_64 (tmp.h[6]);
+    w3[2] = h32_from_64 (tmp.h[7]);
+    w3[3] = l32_from_64 (tmp.h[7]);
+    w4[0] = 0;
+    w4[1] = 0;
+    w4[2] = 0;
+    w4[3] = 0;
+    w5[0] = 0;
+    w5[1] = 0;
+    w5[2] = 0;
+    w5[3] = 0;
+    w6[0] = 0;
+    w6[1] = 0;
+    w6[2] = 0;
+    w6[3] = 0;
+    w7[0] = 0;
+    w7[1] = 0;
+    w7[2] = 0;
+    w7[3] = 0;
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+    w4[0] = w[16];
+    w4[1] = w[17];
+    w4[2] = w[18];
+    w4[3] = w[19];
+    w5[0] = w[20];
+    w5[1] = w[21];
+    w5[2] = w[22];
+    w5[3] = w[23];
+    w6[0] = w[24];
+    w6[1] = w[25];
+    w6[2] = w[26];
+    w6[3] = w[27];
+    w7[0] = w[28];
+    w7[1] = w[29];
+    w7[2] = w[30];
+    w7[3] = w[31];
+  }
+
+  sha512_hmac_init_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
+}
+
+void sha512_hmac_update_vector_128 (sha512_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const int len)
+{
+  sha512_update_vector_128 (&ctx->ipad, w0, w1, w2, w3, w4, w5, w6, w7, len);
+}
+
+void sha512_hmac_update_vector (sha512_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  sha512_update_vector (&ctx->ipad, w, len);
+}
+
+void sha512_hmac_final_vector (sha512_hmac_ctx_t *ctx)
+{
+  sha512_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+  u32x t4[4];
+  u32x t5[4];
+  u32x t6[4];
+  u32x t7[4];
+
+  t0[0] = h32_from_64 (ctx->ipad.h[0]);
+  t0[1] = l32_from_64 (ctx->ipad.h[0]);
+  t0[2] = h32_from_64 (ctx->ipad.h[1]);
+  t0[3] = l32_from_64 (ctx->ipad.h[1]);
+  t1[0] = h32_from_64 (ctx->ipad.h[2]);
+  t1[1] = l32_from_64 (ctx->ipad.h[2]);
+  t1[2] = h32_from_64 (ctx->ipad.h[3]);
+  t1[3] = l32_from_64 (ctx->ipad.h[3]);
+  t2[0] = h32_from_64 (ctx->ipad.h[4]);
+  t2[1] = l32_from_64 (ctx->ipad.h[4]);
+  t2[2] = h32_from_64 (ctx->ipad.h[5]);
+  t2[3] = l32_from_64 (ctx->ipad.h[5]);
+  t3[0] = h32_from_64 (ctx->ipad.h[6]);
+  t3[1] = l32_from_64 (ctx->ipad.h[6]);
+  t3[2] = h32_from_64 (ctx->ipad.h[7]);
+  t3[3] = l32_from_64 (ctx->ipad.h[7]);
+  t4[0] = 0;
+  t4[1] = 0;
+  t4[2] = 0;
+  t4[3] = 0;
+  t5[0] = 0;
+  t5[1] = 0;
+  t5[2] = 0;
+  t5[3] = 0;
+  t6[0] = 0;
+  t6[1] = 0;
+  t6[2] = 0;
+  t6[3] = 0;
+  t7[0] = 0;
+  t7[1] = 0;
+  t7[2] = 0;
+  t7[3] = 0;
+
+  sha512_update_vector_128 (&ctx->opad, t0, t1, t2, t3, t4, t5, t6, t7, 64);
+
+  sha512_final_vector (&ctx->opad);
+}
diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl
index 52f5752e0..a069f9e7a 100644
--- a/OpenCL/inc_hash_whirlpool.cl
+++ b/OpenCL/inc_hash_whirlpool.cl
@@ -2736,6 +2736,167 @@ void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx)
   whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_Ch, ctx->s_Cl);
 }
 
+// HMAC + Vector
+
+typedef struct whirlpool_hmac_ctx_vector
+{
+  whirlpool_ctx_vector_t ipad;
+  whirlpool_ctx_vector_t opad;
+
+} whirlpool_hmac_ctx_vector_t;
+
+void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
+{
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  // ipad
+
+  t0[0] = w0[0] ^ 0x36363636;
+  t0[1] = w0[1] ^ 0x36363636;
+  t0[2] = w0[2] ^ 0x36363636;
+  t0[3] = w0[3] ^ 0x36363636;
+  t1[0] = w1[0] ^ 0x36363636;
+  t1[1] = w1[1] ^ 0x36363636;
+  t1[2] = w1[2] ^ 0x36363636;
+  t1[3] = w1[3] ^ 0x36363636;
+  t2[0] = w2[0] ^ 0x36363636;
+  t2[1] = w2[1] ^ 0x36363636;
+  t2[2] = w2[2] ^ 0x36363636;
+  t2[3] = w2[3] ^ 0x36363636;
+  t3[0] = w3[0] ^ 0x36363636;
+  t3[1] = w3[1] ^ 0x36363636;
+  t3[2] = w3[2] ^ 0x36363636;
+  t3[3] = w3[3] ^ 0x36363636;
+
+  whirlpool_init_vector (&ctx->ipad, s_Ch, s_Cl);
+
+  whirlpool_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64);
+
+  // opad
+
+  t0[0] = w0[0] ^ 0x5c5c5c5c;
+  t0[1] = w0[1] ^ 0x5c5c5c5c;
+  t0[2] = w0[2] ^ 0x5c5c5c5c;
+  t0[3] = w0[3] ^ 0x5c5c5c5c;
+  t1[0] = w1[0] ^ 0x5c5c5c5c;
+  t1[1] = w1[1] ^ 0x5c5c5c5c;
+  t1[2] = w1[2] ^ 0x5c5c5c5c;
+  t1[3] = w1[3] ^ 0x5c5c5c5c;
+  t2[0] = w2[0] ^ 0x5c5c5c5c;
+  t2[1] = w2[1] ^ 0x5c5c5c5c;
+  t2[2] = w2[2] ^ 0x5c5c5c5c;
+  t2[3] = w2[3] ^ 0x5c5c5c5c;
+  t3[0] = w3[0] ^ 0x5c5c5c5c;
+  t3[1] = w3[1] ^ 0x5c5c5c5c;
+  t3[2] = w3[2] ^ 0x5c5c5c5c;
+  t3[3] = w3[3] ^ 0x5c5c5c5c;
+
+  whirlpool_init_vector (&ctx->opad, s_Ch, s_Cl);
+
+  whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+}
+
+void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
+{
+  u32x w0[4];
+  u32x w1[4];
+  u32x w2[4];
+  u32x w3[4];
+
+  if (len > 64)
+  {
+    whirlpool_ctx_vector_t tmp;
+
+    whirlpool_init_vector (&tmp, s_Ch, s_Cl);
+
+    whirlpool_update_vector (&tmp, w, len);
+
+    whirlpool_final_vector (&tmp);
+
+    w0[0] = tmp.h[ 0];
+    w0[1] = tmp.h[ 1];
+    w0[2] = tmp.h[ 2];
+    w0[3] = tmp.h[ 3];
+    w1[0] = tmp.h[ 4];
+    w1[1] = tmp.h[ 5];
+    w1[2] = tmp.h[ 6];
+    w1[3] = tmp.h[ 7];
+    w2[0] = tmp.h[ 8];
+    w2[1] = tmp.h[ 9];
+    w2[2] = tmp.h[10];
+    w2[3] = tmp.h[11];
+    w3[0] = tmp.h[12];
+    w3[1] = tmp.h[13];
+    w3[2] = tmp.h[14];
+    w3[3] = tmp.h[15];
+  }
+  else
+  {
+    w0[0] = w[ 0];
+    w0[1] = w[ 1];
+    w0[2] = w[ 2];
+    w0[3] = w[ 3];
+    w1[0] = w[ 4];
+    w1[1] = w[ 5];
+    w1[2] = w[ 6];
+    w1[3] = w[ 7];
+    w2[0] = w[ 8];
+    w2[1] = w[ 9];
+    w2[2] = w[10];
+    w2[3] = w[11];
+    w3[0] = w[12];
+    w3[1] = w[13];
+    w3[2] = w[14];
+    w3[3] = w[15];
+  }
+
+  whirlpool_hmac_init_vector_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl);
+}
+
+void whirlpool_hmac_update_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const int len)
+{
+  whirlpool_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
+}
+
+void whirlpool_hmac_update_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
+{
+  whirlpool_update_vector (&ctx->ipad, w, len);
+}
+
+void whirlpool_hmac_final_vector (whirlpool_hmac_ctx_vector_t *ctx)
+{
+  whirlpool_final_vector (&ctx->ipad);
+
+  u32x t0[4];
+  u32x t1[4];
+  u32x t2[4];
+  u32x t3[4];
+
+  t0[0] = ctx->ipad.h[ 0];
+  t0[1] = ctx->ipad.h[ 1];
+  t0[2] = ctx->ipad.h[ 2];
+  t0[3] = ctx->ipad.h[ 3];
+  t1[0] = ctx->ipad.h[ 4];
+  t1[1] = ctx->ipad.h[ 5];
+  t1[2] = ctx->ipad.h[ 6];
+  t1[3] = ctx->ipad.h[ 7];
+  t2[0] = ctx->ipad.h[ 8];
+  t2[1] = ctx->ipad.h[ 9];
+  t2[2] = ctx->ipad.h[10];
+  t2[3] = ctx->ipad.h[11];
+  t3[0] = ctx->ipad.h[12];
+  t3[1] = ctx->ipad.h[13];
+  t3[2] = ctx->ipad.h[14];
+  t3[3] = ctx->ipad.h[15];
+
+  whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
+
+  whirlpool_final_vector (&ctx->opad);
+}
+
 #undef R
 #undef BOX
 #undef BOX_S