diff --git a/OpenCL/inc_ecc_secp256k1.cl b/OpenCL/inc_ecc_secp256k1.cl
new file mode 100644
index 000000000..92551d5e5
--- /dev/null
+++ b/OpenCL/inc_ecc_secp256k1.cl
@@ -0,0 +1,1820 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ *
+ * Furthermore, since elliptic curve operations are highly researched and optimized,
+ * we've consulted a lot of online resources to implement this, including several papers and
+ * example code.
+ *
+ * Credits where credits are due: there are a lot of nice projects that explain and/or optimize
+ * elliptic curve operations (especially elliptic curve multiplications by a scalar).
+ *
+ * We want to shout out following projects, which were quite helpful when implementing this:
+ * - secp256k1 by Pieter Wuille (https://github.com/bitcoin-core/secp256k1/, MIT)
+ * - secp256k1-cl by hhanh00 (https://github.com/hhanh00/secp256k1-cl/, MIT)
+ * - ec_pure_c by masterzorag (https://github.com/masterzorag/ec_pure_c/)
+ * - ecc-gmp by leivaburto (https://github.com/leivaburto/ecc-gmp)
+ * - micro-ecc by Ken MacKay (https://github.com/kmackay/micro-ecc/, BSD)
+ * - curve_example by willem (https://gist.github.com/nlitsme/c9031c7b9bf6bb009e5a)
+ * - py_ecc by Vitalik Buterin (https://github.com/ethereum/py_ecc/, MIT)
+ *
+ *
+ * Some BigNum operations are implemented similar to micro-ecc which is licensed under these terms:
+ *  Copyright 2014 Ken MacKay, 2-Clause BSD License
+ *
+ *  Redistribution and use in source and binary forms, with or without modification, are permitted
+ *  provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice, this list of
+ *     conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice, this list of
+ *     conditions and the following disclaimer in the documentation and/or other materials
+ *     provided with the distribution.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+ *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ *  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ *  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ *  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * ATTENTION: this code is NOT meant to be used in security critical environments that are at risk
+ * of side-channel or timing attacks etc, it's only purpose is to make it work fast for GPGPU
+ * (OpenCL/CUDA). Some attack vectors like side-channel and timing-attacks might be possible,
+ * because of some optimizations used within this code (non-constant time etc).
+ */
+
+/*
+ * Implementation considerations:
+ * point double and point add are implemented similar to algorithms mentioned in this 2011 paper:
+ * http://eprint.iacr.org/2011/338.pdf
+ * (Fast and Regular Algorithms for Scalar Multiplication over Elliptic Curves by Matthieu Rivain)
+ *
+ * In theory we could use the Jacobian Co-Z enhancement to get rid of the larger buffer caused by
+ * the z coordinates (and in this way reduce register pressure etc).
+ * For the Co-Z improvement there are a lot of fast algorithms, but we might still be faster
+ * with this implementation (b/c we allow non-constant time) without the Brier/Joye Montgomery-like
+ * ladder. Of course, this claim would need to be verified and tested to see which one is faster
+ * for our specific scenario at the end.
+ *
+ * A speedup could also be possible by using scalars converted to (w)NAF (non-adjacent form) or by
+ * just using the windowed (precomputed zi) method or similar improvements:
+ * The general idea of w-NAF would be to pre-compute some zi coefficients like below to reduce the
+ * costly point additions by using a non-binary ("signed") number system (values other than just
+ * 0 and 1, but ranging from -2^(w-1)-1 to 2^(w-1)-1). This would work best with the left-to-right
+ * binary algorithm such that we could just add zi * P when adding point P (pre-compute all the
+ * possible zi * P values because the x/y coordinates are known before the kernel starts):
+ *
+ *  // Example with window size w = 2 (i.e. mod 4 => & 3):
+ *  // 173 => 1 0 -1 0 -1 0 -1 0 1 = 2^8 - 2^6 - 2^4 - 2^2 + 1
+ *  int e = 0b10101101;   // 173
+ *  int z[8 + 1] = { 0 }; // our zi/di, we need one extra slot to make the substract work
+ *
+ *  int i = 0;
+ *
+ *  while (e)
+ *  {
+ *    if (e & 1)
+ *    {
+ *      // for window size w = 3 it would be:
+ *      // => 2^(w-0) = 2^3 = 8
+ *      // => 2^(w-1) = 2^2 = 4
+ *
+ *      int bit; // = 2 - (e & 3) for w = 2
+ *
+ *      if ((e & 3) >= 2) // e % 4 == e & 3, use (e & 7) >= 4 for w = 3
+ *        bit = (e & 3) - 4; // (e & 7) - 8 for w = 3
+ *      else
+ *        bit = e & 3; // e & 7 for w = 3
+ *
+ *      z[i] = bit;
+ *      e   -= bit;
+ *    }
+ *
+ *    e >>= 1; // e / 2
+ *    i++;
+ *  }
+*/
+
+#include "inc_ecc_secp256k1.h"
+
+DECLSPEC u32 sub (u32 r[8], const u32 a[8], const u32 b[8])
+{
+  u32 c = 0; // carry/borrow
+
+  for (u32 i = 0; i < 8; i++)
+  {
+    const u32 diff = a[i] - b[i] - c;
+
+    if (diff != a[i]) c = (diff > a[i]);
+
+    r[i] = diff;
+ }
+
+ return c;
+}
+
+DECLSPEC u32 add (u32 r[8], const u32 a[8], const u32 b[8])
+{
+  u32 c = 0; // carry/borrow
+
+  for (u32 i = 0; i < 8; i++)
+  {
+    const u32 t = a[i] + b[i] + c;
+
+    if (t != a[i]) c = (t < a[i]);
+
+    r[i] = t;
+  }
+
+  return c;
+}
+
+DECLSPEC void sub_mod (u32 r[8], const u32 a[8], const u32 b[8])
+{
+  const u32 c = sub (r, a, b); // carry
+
+  if (c)
+  {
+    u32 t[8];
+
+    t[0] = SECP256K1_P0;
+    t[1] = SECP256K1_P1;
+    t[2] = SECP256K1_P2;
+    t[3] = SECP256K1_P3;
+    t[4] = SECP256K1_P4;
+    t[5] = SECP256K1_P5;
+    t[6] = SECP256K1_P6;
+    t[7] = SECP256K1_P7;
+
+    add (r, r, t);
+  }
+}
+
+DECLSPEC void add_mod (u32 r[8], const u32 a[8], const u32 b[8])
+{
+  const u32 c = add (r, a, b); // carry
+
+  /*
+   * Modulo operation:
+   */
+
+  // note: we could have an early exit in case of c == 1 => sub ()
+
+  u32 t[8];
+
+  t[0] = SECP256K1_P0;
+  t[1] = SECP256K1_P1;
+  t[2] = SECP256K1_P2;
+  t[3] = SECP256K1_P3;
+  t[4] = SECP256K1_P4;
+  t[5] = SECP256K1_P5;
+  t[6] = SECP256K1_P6;
+  t[7] = SECP256K1_P7;
+
+  // check if modulo operation is needed
+
+  u32 mod = 1;
+
+  if (c == 0)
+  {
+    for (int i = 7; i >= 0; i--)
+    {
+      if (r[i] < t[i])
+      {
+        mod = 0;
+
+        break; // or return ! (check if faster)
+      }
+
+      if (r[i] > t[i]) break;
+    }
+  }
+
+  if (mod == 1)
+  {
+    sub (r, r, t);
+  }
+}
+
+DECLSPEC void mod_512 (u32 n[16])
+{
+  // we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo):
+  // the modulus is the secp256k1 group order
+
+  // ATTENTION: for this function the byte-order is reversed (most significant bytes
+  // at the left)
+
+  /*
+    the general modulo by shift and substract code (a = a % b):
+
+    x = b;
+
+    t = a >> 1;
+
+    while (x <= t) x <<= 1;
+
+    while (a >= b)
+    {
+      if (a >= x) a -= x;
+
+      x >>= 1;
+    }
+
+    return a; // remainder
+  */
+
+  u32 a[16];
+
+  a[ 0] = n[ 0];
+  a[ 1] = n[ 1];
+  a[ 2] = n[ 2];
+  a[ 3] = n[ 3];
+  a[ 4] = n[ 4];
+  a[ 5] = n[ 5];
+  a[ 6] = n[ 6];
+  a[ 7] = n[ 7];
+  a[ 8] = n[ 8];
+  a[ 9] = n[ 9];
+  a[10] = n[10];
+  a[11] = n[11];
+  a[12] = n[12];
+  a[13] = n[13];
+  a[14] = n[14];
+  a[15] = n[15];
+
+  u32 b[16];
+
+  b[ 0] = 0x00000000;
+  b[ 1] = 0x00000000;
+  b[ 2] = 0x00000000;
+  b[ 3] = 0x00000000;
+  b[ 4] = 0x00000000;
+  b[ 5] = 0x00000000;
+  b[ 6] = 0x00000000;
+  b[ 7] = 0x00000000;
+  b[ 8] = SECP256K1_N7;
+  b[ 9] = SECP256K1_N6;
+  b[10] = SECP256K1_N5;
+  b[11] = SECP256K1_N4;
+  b[12] = SECP256K1_N3;
+  b[13] = SECP256K1_N2;
+  b[14] = SECP256K1_N1;
+  b[15] = SECP256K1_N0;
+
+  /*
+   * Start:
+   */
+
+  // x = b (but with a fast "shift" trick to avoid the while loop)
+
+  u32 x[16];
+
+  x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the
+  x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1
+  x[ 2] = b[10];
+  x[ 3] = b[11];
+  x[ 4] = b[12];
+  x[ 5] = b[13];
+  x[ 6] = b[14];
+  x[ 7] = b[15];
+  x[ 8] = 0x00000000;
+  x[ 9] = 0x00000000;
+  x[10] = 0x00000000;
+  x[11] = 0x00000000;
+  x[12] = 0x00000000;
+  x[13] = 0x00000000;
+  x[14] = 0x00000000;
+  x[15] = 0x00000000;
+
+  // a >= b
+
+  while (a[0] >= b[0])
+  {
+    const u32 l1 = (a[ 0]  < b[ 0]) <<  0
+                 | (a[ 1]  < b[ 1]) <<  1
+                 | (a[ 2]  < b[ 2]) <<  2
+                 | (a[ 3]  < b[ 3]) <<  3
+                 | (a[ 4]  < b[ 4]) <<  4
+                 | (a[ 5]  < b[ 5]) <<  5
+                 | (a[ 6]  < b[ 6]) <<  6
+                 | (a[ 7]  < b[ 7]) <<  7
+                 | (a[ 8]  < b[ 8]) <<  8
+                 | (a[ 9]  < b[ 9]) <<  9
+                 | (a[10]  < b[10]) << 10
+                 | (a[11]  < b[11]) << 11
+                 | (a[12]  < b[12]) << 12
+                 | (a[13]  < b[13]) << 13
+                 | (a[14]  < b[14]) << 14
+                 | (a[15]  < b[15]) << 15;
+
+    const u32 e1 = (a[ 0] == b[ 0]) <<  0
+                 | (a[ 1] == b[ 1]) <<  1
+                 | (a[ 2] == b[ 2]) <<  2
+                 | (a[ 3] == b[ 3]) <<  3
+                 | (a[ 4] == b[ 4]) <<  4
+                 | (a[ 5] == b[ 5]) <<  5
+                 | (a[ 6] == b[ 6]) <<  6
+                 | (a[ 7] == b[ 7]) <<  7
+                 | (a[ 8] == b[ 8]) <<  8
+                 | (a[ 9] == b[ 9]) <<  9
+                 | (a[10] == b[10]) << 10
+                 | (a[11] == b[11]) << 11
+                 | (a[12] == b[12]) << 12
+                 | (a[13] == b[13]) << 13
+                 | (a[14] == b[14]) << 14
+                 | (a[15] == b[15]) << 15;
+
+    if (l1)
+    {
+      if (l1 & 0x0001)                              break;
+      if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break;
+      if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break;
+      if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break;
+      if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break;
+      if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break;
+      if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break;
+      if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break;
+      if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break;
+      if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break;
+      if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break;
+      if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break;
+      if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break;
+      if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break;
+      if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break;
+      if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break;
+    }
+
+    // r = x (copy it to have the original values for the subtraction)
+
+    u32 r[16];
+
+    r[ 0] = x[ 0];
+    r[ 1] = x[ 1];
+    r[ 2] = x[ 2];
+    r[ 3] = x[ 3];
+    r[ 4] = x[ 4];
+    r[ 5] = x[ 5];
+    r[ 6] = x[ 6];
+    r[ 7] = x[ 7];
+    r[ 8] = x[ 8];
+    r[ 9] = x[ 9];
+    r[10] = x[10];
+    r[11] = x[11];
+    r[12] = x[12];
+    r[13] = x[13];
+    r[14] = x[14];
+    r[15] = x[15];
+
+    // x >>= 1
+
+    x[15] = x[15] >> 1 | (x[14] & 1) << 31;
+    x[14] = x[14] >> 1 | (x[13] & 1) << 31;
+    x[13] = x[13] >> 1 | (x[12] & 1) << 31;
+    x[12] = x[12] >> 1 | (x[11] & 1) << 31;
+    x[11] = x[11] >> 1 | (x[10] & 1) << 31;
+    x[10] = x[10] >> 1 | (x[ 9] & 1) << 31;
+    x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31;
+    x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31;
+    x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31;
+    x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31;
+    x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31;
+    x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31;
+    x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31;
+    x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31;
+    x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31;
+    x[ 0] = x[ 0] >> 1;
+
+    // if (a >= r) a -= r;
+
+    const u32 l2 = (a[ 0]  < r[ 0]) <<  0
+                 | (a[ 1]  < r[ 1]) <<  1
+                 | (a[ 2]  < r[ 2]) <<  2
+                 | (a[ 3]  < r[ 3]) <<  3
+                 | (a[ 4]  < r[ 4]) <<  4
+                 | (a[ 5]  < r[ 5]) <<  5
+                 | (a[ 6]  < r[ 6]) <<  6
+                 | (a[ 7]  < r[ 7]) <<  7
+                 | (a[ 8]  < r[ 8]) <<  8
+                 | (a[ 9]  < r[ 9]) <<  9
+                 | (a[10]  < r[10]) << 10
+                 | (a[11]  < r[11]) << 11
+                 | (a[12]  < r[12]) << 12
+                 | (a[13]  < r[13]) << 13
+                 | (a[14]  < r[14]) << 14
+                 | (a[15]  < r[15]) << 15;
+
+    const u32 e2 = (a[ 0] == r[ 0]) <<  0
+                 | (a[ 1] == r[ 1]) <<  1
+                 | (a[ 2] == r[ 2]) <<  2
+                 | (a[ 3] == r[ 3]) <<  3
+                 | (a[ 4] == r[ 4]) <<  4
+                 | (a[ 5] == r[ 5]) <<  5
+                 | (a[ 6] == r[ 6]) <<  6
+                 | (a[ 7] == r[ 7]) <<  7
+                 | (a[ 8] == r[ 8]) <<  8
+                 | (a[ 9] == r[ 9]) <<  9
+                 | (a[10] == r[10]) << 10
+                 | (a[11] == r[11]) << 11
+                 | (a[12] == r[12]) << 12
+                 | (a[13] == r[13]) << 13
+                 | (a[14] == r[14]) << 14
+                 | (a[15] == r[15]) << 15;
+
+    if (l2)
+    {
+      if (l2 & 0x0001)                              continue;
+      if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue;
+      if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue;
+      if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue;
+      if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue;
+      if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue;
+      if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue;
+      if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue;
+      if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue;
+      if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue;
+      if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue;
+      if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue;
+      if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue;
+      if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue;
+      if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue;
+      if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue;
+    }
+
+    // substract (a -= r):
+
+    r[ 0] = a[ 0] - r[ 0];
+    r[ 1] = a[ 1] - r[ 1];
+    r[ 2] = a[ 2] - r[ 2];
+    r[ 3] = a[ 3] - r[ 3];
+    r[ 4] = a[ 4] - r[ 4];
+    r[ 5] = a[ 5] - r[ 5];
+    r[ 6] = a[ 6] - r[ 6];
+    r[ 7] = a[ 7] - r[ 7];
+    r[ 8] = a[ 8] - r[ 8];
+    r[ 9] = a[ 9] - r[ 9];
+    r[10] = a[10] - r[10];
+    r[11] = a[11] - r[11];
+    r[12] = a[12] - r[12];
+    r[13] = a[13] - r[13];
+    r[14] = a[14] - r[14];
+    r[15] = a[15] - r[15];
+
+    // take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!)
+
+    if (r[ 1] > a[ 1]) r[ 0]--;
+    if (r[ 2] > a[ 2]) r[ 1]--;
+    if (r[ 3] > a[ 3]) r[ 2]--;
+    if (r[ 4] > a[ 4]) r[ 3]--;
+    if (r[ 5] > a[ 5]) r[ 4]--;
+    if (r[ 6] > a[ 6]) r[ 5]--;
+    if (r[ 7] > a[ 7]) r[ 6]--;
+    if (r[ 8] > a[ 8]) r[ 7]--;
+    if (r[ 9] > a[ 9]) r[ 8]--;
+    if (r[10] > a[10]) r[ 9]--;
+    if (r[11] > a[11]) r[10]--;
+    if (r[12] > a[12]) r[11]--;
+    if (r[13] > a[13]) r[12]--;
+    if (r[14] > a[14]) r[13]--;
+    if (r[15] > a[15]) r[14]--;
+
+    a[ 0] = r[ 0];
+    a[ 1] = r[ 1];
+    a[ 2] = r[ 2];
+    a[ 3] = r[ 3];
+    a[ 4] = r[ 4];
+    a[ 5] = r[ 5];
+    a[ 6] = r[ 6];
+    a[ 7] = r[ 7];
+    a[ 8] = r[ 8];
+    a[ 9] = r[ 9];
+    a[10] = r[10];
+    a[11] = r[11];
+    a[12] = r[12];
+    a[13] = r[13];
+    a[14] = r[14];
+    a[15] = r[15];
+  }
+
+  n[ 0] = a[ 0];
+  n[ 1] = a[ 1];
+  n[ 2] = a[ 2];
+  n[ 3] = a[ 3];
+  n[ 4] = a[ 4];
+  n[ 5] = a[ 5];
+  n[ 6] = a[ 6];
+  n[ 7] = a[ 7];
+  n[ 8] = a[ 8];
+  n[ 9] = a[ 9];
+  n[10] = a[10];
+  n[11] = a[11];
+  n[12] = a[12];
+  n[13] = a[13];
+  n[14] = a[14];
+  n[15] = a[15];
+}
+
+DECLSPEC void mul_mod (u32 r[8], const u32 a[8], const u32 b[8]) // TODO get rid of u64 ?
+{
+  u32 t[16] = { 0 }; // we need up to double the space (2 * 8)
+
+  /*
+   * First start with the basic a * b multiplication:
+   */
+
+  u32 t0 = 0;
+  u32 t1 = 0;
+  u32 c  = 0;
+
+  for (u32 i = 0; i < 8; i++)
+  {
+    for (u32 j = 0; j <= i; j++)
+    {
+      u64 p = ((u64) a[j]) * b[i - j];
+
+      u64 d = ((u64) t1) << 32 | t0;
+
+      d += p;
+
+      t0 = (u32) d;
+      t1 = d >> 32;
+
+      c += d < p; // carry
+    }
+
+    t[i] = t0;
+
+    t0 = t1;
+    t1 = c;
+
+    c = 0;
+  }
+
+  for (u32 i = 8; i < 15; i++)
+  {
+    for (u32 j = i - 7; j < 8; j++)
+    {
+      u64 p = ((u64) a[j]) * b[i - j];
+
+      u64 d = ((u64) t1) << 32 | t0;
+
+      d += p;
+
+      t0 = (u32) d;
+      t1 = d >> 32;
+
+      c += d < p;
+    }
+
+    t[i] = t0;
+
+    t0 = t1;
+    t1 = c;
+
+    c = 0;
+  }
+
+  t[15] = t0;
+
+
+
+  /*
+   * Now do the modulo operation:
+   * (r = t % p)
+   *
+   * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf (p.354 or p.9 in that document)
+   */
+
+  u32 tmp[16] = { 0 };
+
+  // c = 0;
+
+  // Note: SECP256K1_P = 2^256 - 2^32 - 977 (0x03d1 = 977)
+  // multiply t[8]...t[15] by omega:
+
+  for (u32 i = 0, j = 8; i < 8; i++, j++)
+  {
+    u64 p = ((u64) 0x03d1) * t[j] + c;
+
+    tmp[i] = (u32) p;
+
+    c = p >> 32;
+  }
+
+  tmp[8] = c;
+
+  c = add (tmp + 1, tmp + 1, t + 8); // modifies tmp[1]...tmp[8]
+
+  tmp[9] = c;
+
+
+  // r = t + tmp
+
+  c = add (r, t, tmp);
+
+  // multiply t[0]...t[7] by omega:
+
+  u32 c2 = 0;
+
+  // memset (t, 0, sizeof (t));
+
+  for (u32 i = 0, j = 8; i < 8; i++, j++)
+  {
+    u64 p = ((u64) 0x3d1) * tmp[j] + c2;
+
+    t[i] = (u32) p;
+
+    c2 = p >> 32;
+  }
+
+  t[8] = c2;
+
+  c2 = add (t + 1, t + 1, tmp + 8); // modifies t[1]...t[8]
+
+  t[9] = c2;
+
+
+  // r = r + t
+
+  c2 = add (r, r, t);
+
+  c += c2;
+
+  t[0] = SECP256K1_P0;
+  t[1] = SECP256K1_P1;
+  t[2] = SECP256K1_P2;
+  t[3] = SECP256K1_P3;
+  t[4] = SECP256K1_P4;
+  t[5] = SECP256K1_P5;
+  t[6] = SECP256K1_P6;
+  t[7] = SECP256K1_P7;
+
+  for (u32 i = c; i > 0; i--)
+  {
+    sub (r, r, t);
+  }
+
+  for (int i = 7; i >= 0; i--)
+  {
+    if (r[i] < t[i]) break;
+
+    if (r[i] > t[i])
+    {
+      sub (r, r, t);
+
+      break;
+    }
+  }
+}
+
+DECLSPEC void sqrt_mod (u32 r[8])
+{
+  // Fermat's Little Theorem
+  // secp256k1: y^2 = x^3 + 7 % p
+  // y ^ (p - 1) = 1
+  // y ^ (p - 1) = (y^2) ^ ((p - 1) / 2) = 1 => y^2 = (y^2) ^ (((p - 1) / 2) + 1)
+  // => y = (y^2) ^ ((((p - 1) / 2) + 1) / 2)
+  // y = (y^2) ^ (((p - 1 + 2) / 2) / 2) = (y^2) ^ ((p + 1) / 4)
+
+  // y1 = (x^3 + 7) ^ ((p + 1) / 4)
+  // y2 = p - y1 (or y2 = y1 * -1 % p)
+
+  u32 s[8];
+
+  s[0] = SECP256K1_P0 + 1; //  because of (p + 1) / 4 or use add (s, s, 1)
+  s[1] = SECP256K1_P1;
+  s[2] = SECP256K1_P2;
+  s[3] = SECP256K1_P3;
+  s[4] = SECP256K1_P4;
+  s[5] = SECP256K1_P5;
+  s[6] = SECP256K1_P6;
+  s[7] = SECP256K1_P7;
+
+  u32 t[8] = { 0 };
+
+  t[0] = 1;
+
+  for (u32 i = 255; i > 1; i--) // we just skip the last 2 multiplications (=> exp / 4)
+  {
+    mul_mod (t, t, t); // r * r
+
+    u32 idx  = i >> 5;
+    u32 mask = 1 << (i & 0x1f);
+
+    if (s[idx] & mask)
+    {
+      mul_mod (t, t, r); // t * r
+    }
+  }
+
+  r[0] = t[0];
+  r[1] = t[1];
+  r[2] = t[2];
+  r[3] = t[3];
+  r[4] = t[4];
+  r[5] = t[5];
+  r[6] = t[6];
+  r[7] = t[7];
+}
+
+// (inverse (a, p) * a) % p == 1 (or think of a * a^-1 = a / a = 1)
+
+DECLSPEC void inv_mod (u32 a[8])
+{
+  // How often does this really happen? it should "almost" never happen (but would be safer)
+  // if ((a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0) return;
+
+  u32 t0[8];
+
+  t0[0] = a[0];
+  t0[1] = a[1];
+  t0[2] = a[2];
+  t0[3] = a[3];
+  t0[4] = a[4];
+  t0[5] = a[5];
+  t0[6] = a[6];
+  t0[7] = a[7];
+
+  u32 p[8];
+
+  p[0] = SECP256K1_P0;
+  p[1] = SECP256K1_P1;
+  p[2] = SECP256K1_P2;
+  p[3] = SECP256K1_P3;
+  p[4] = SECP256K1_P4;
+  p[5] = SECP256K1_P5;
+  p[6] = SECP256K1_P6;
+  p[7] = SECP256K1_P7;
+
+  u32 t1[8];
+
+  t1[0] = SECP256K1_P0;
+  t1[1] = SECP256K1_P1;
+  t1[2] = SECP256K1_P2;
+  t1[3] = SECP256K1_P3;
+  t1[4] = SECP256K1_P4;
+  t1[5] = SECP256K1_P5;
+  t1[6] = SECP256K1_P6;
+  t1[7] = SECP256K1_P7;
+
+  u32 t2[8] = { 0 };
+
+  t2[0] = 0x00000001;
+
+  u32 t3[8] = { 0 };
+
+  u32 b = (t0[0] != t1[0])
+        | (t0[1] != t1[1])
+        | (t0[2] != t1[2])
+        | (t0[3] != t1[3])
+        | (t0[4] != t1[4])
+        | (t0[5] != t1[5])
+        | (t0[6] != t1[6])
+        | (t0[7] != t1[7]);
+
+  while (b)
+  {
+    if ((t0[0] & 1) == 0) // even
+    {
+      t0[0] = t0[0] >> 1 | t0[1] << 31;
+      t0[1] = t0[1] >> 1 | t0[2] << 31;
+      t0[2] = t0[2] >> 1 | t0[3] << 31;
+      t0[3] = t0[3] >> 1 | t0[4] << 31;
+      t0[4] = t0[4] >> 1 | t0[5] << 31;
+      t0[5] = t0[5] >> 1 | t0[6] << 31;
+      t0[6] = t0[6] >> 1 | t0[7] << 31;
+      t0[7] = t0[7] >> 1;
+
+      u32 c = 0;
+
+      if (t2[0] & 1) c = add (t2, t2, p);
+
+      t2[0] = t2[0] >> 1 | t2[1] << 31;
+      t2[1] = t2[1] >> 1 | t2[2] << 31;
+      t2[2] = t2[2] >> 1 | t2[3] << 31;
+      t2[3] = t2[3] >> 1 | t2[4] << 31;
+      t2[4] = t2[4] >> 1 | t2[5] << 31;
+      t2[5] = t2[5] >> 1 | t2[6] << 31;
+      t2[6] = t2[6] >> 1 | t2[7] << 31;
+      t2[7] = t2[7] >> 1 | c     << 31;
+    }
+    else if ((t1[0] & 1) == 0)
+    {
+      t1[0] = t1[0] >> 1 | t1[1] << 31;
+      t1[1] = t1[1] >> 1 | t1[2] << 31;
+      t1[2] = t1[2] >> 1 | t1[3] << 31;
+      t1[3] = t1[3] >> 1 | t1[4] << 31;
+      t1[4] = t1[4] >> 1 | t1[5] << 31;
+      t1[5] = t1[5] >> 1 | t1[6] << 31;
+      t1[6] = t1[6] >> 1 | t1[7] << 31;
+      t1[7] = t1[7] >> 1;
+
+      u32 c = 0;
+
+      if (t3[0] & 1) c = add (t3, t3, p);
+
+      t3[0] = t3[0] >> 1 | t3[1] << 31;
+      t3[1] = t3[1] >> 1 | t3[2] << 31;
+      t3[2] = t3[2] >> 1 | t3[3] << 31;
+      t3[3] = t3[3] >> 1 | t3[4] << 31;
+      t3[4] = t3[4] >> 1 | t3[5] << 31;
+      t3[5] = t3[5] >> 1 | t3[6] << 31;
+      t3[6] = t3[6] >> 1 | t3[7] << 31;
+      t3[7] = t3[7] >> 1 | c     << 31;
+    }
+    else
+    {
+      u32 gt = 0;
+
+      for (int i = 7; i >= 0; i--)
+      {
+        if (t0[i] > t1[i])
+        {
+          gt = 1;
+
+          break;
+        }
+
+        if (t0[i] < t1[i]) break;
+      }
+
+      if (gt)
+      {
+        sub (t0, t0, t1);
+
+        t0[0] = t0[0] >> 1 | t0[1] << 31;
+        t0[1] = t0[1] >> 1 | t0[2] << 31;
+        t0[2] = t0[2] >> 1 | t0[3] << 31;
+        t0[3] = t0[3] >> 1 | t0[4] << 31;
+        t0[4] = t0[4] >> 1 | t0[5] << 31;
+        t0[5] = t0[5] >> 1 | t0[6] << 31;
+        t0[6] = t0[6] >> 1 | t0[7] << 31;
+        t0[7] = t0[7] >> 1;
+
+        u32 lt = 0;
+
+        for (int i = 7; i >= 0; i--)
+        {
+          if (t2[i] < t3[i])
+          {
+            lt = 1;
+
+            break;
+          }
+
+          if (t2[i] > t3[i]) break;
+        }
+
+        if (lt) add (t2, t2, p);
+
+        sub (t2, t2, t3);
+
+        u32 c = 0;
+
+        if (t2[0] & 1) c = add (t2, t2, p);
+
+        t2[0] = t2[0] >> 1 | t2[1] << 31;
+        t2[1] = t2[1] >> 1 | t2[2] << 31;
+        t2[2] = t2[2] >> 1 | t2[3] << 31;
+        t2[3] = t2[3] >> 1 | t2[4] << 31;
+        t2[4] = t2[4] >> 1 | t2[5] << 31;
+        t2[5] = t2[5] >> 1 | t2[6] << 31;
+        t2[6] = t2[6] >> 1 | t2[7] << 31;
+        t2[7] = t2[7] >> 1 | c     << 31;
+      }
+      else
+      {
+        sub (t1, t1, t0);
+
+        t1[0] = t1[0] >> 1 | t1[1] << 31;
+        t1[1] = t1[1] >> 1 | t1[2] << 31;
+        t1[2] = t1[2] >> 1 | t1[3] << 31;
+        t1[3] = t1[3] >> 1 | t1[4] << 31;
+        t1[4] = t1[4] >> 1 | t1[5] << 31;
+        t1[5] = t1[5] >> 1 | t1[6] << 31;
+        t1[6] = t1[6] >> 1 | t1[7] << 31;
+        t1[7] = t1[7] >> 1;
+
+        u32 lt = 0;
+
+        for (int i = 7; i >= 0; i--)
+        {
+          if (t3[i] < t2[i])
+          {
+            lt = 1;
+
+            break;
+          }
+
+          if (t3[i] > t2[i]) break;
+        }
+
+        if (lt) add (t3, t3, p);
+
+        sub (t3, t3, t2);
+
+        u32 c = 0;
+
+        if (t3[0] & 1) c = add (t3, t3, p);
+
+        t3[0] = t3[0] >> 1 | t3[1] << 31;
+        t3[1] = t3[1] >> 1 | t3[2] << 31;
+        t3[2] = t3[2] >> 1 | t3[3] << 31;
+        t3[3] = t3[3] >> 1 | t3[4] << 31;
+        t3[4] = t3[4] >> 1 | t3[5] << 31;
+        t3[5] = t3[5] >> 1 | t3[6] << 31;
+        t3[6] = t3[6] >> 1 | t3[7] << 31;
+        t3[7] = t3[7] >> 1 | c     << 31;
+      }
+    }
+
+    // update b:
+
+    b = (t0[0] != t1[0])
+      | (t0[1] != t1[1])
+      | (t0[2] != t1[2])
+      | (t0[3] != t1[3])
+      | (t0[4] != t1[4])
+      | (t0[5] != t1[5])
+      | (t0[6] != t1[6])
+      | (t0[7] != t1[7]);
+  }
+
+  // set result:
+
+  a[0] = t2[0];
+  a[1] = t2[1];
+  a[2] = t2[2];
+  a[3] = t2[3];
+  a[4] = t2[4];
+  a[5] = t2[5];
+  a[6] = t2[6];
+  a[7] = t2[7];
+}
+
+/*
+  // everything from the formulas below of course MOD the prime:
+
+  // we use this formula:
+
+  X = (3/2 * x^2)^2 - 2 * x * y^2
+  Y = (3/2 * x^2) * (x * y^2 - X) - y^4
+  Z = y * z
+
+  this is identical to the more frequently used form:
+
+  X = (3 * x^2)^2 - 8 * x * y^2
+  Y =  3 * x^2 * (4 * x * y^2 - X) - 8 * y^4
+  Z =  2 * y * z
+*/
+
+DECLSPEC void point_double (u32 x[8], u32 y[8], u32 z[8])
+{
+  // How often does this really happen? it should "almost" never happen (but would be safer)
+
+  /*
+  if ((y[0] | y[1] | y[2] | y[3] | y[4] | y[5] | y[6] | y[7]) == 0)
+  {
+    x[0] = 0;
+    x[1] = 0;
+    x[2] = 0;
+    x[3] = 0;
+    x[4] = 0;
+    x[5] = 0;
+    x[6] = 0;
+    x[7] = 0;
+
+    y[0] = 0;
+    y[1] = 0;
+    y[2] = 0;
+    y[3] = 0;
+    y[4] = 0;
+    y[5] = 0;
+    y[6] = 0;
+    y[7] = 0;
+
+    z[0] = 0;
+    z[1] = 0;
+    z[2] = 0;
+    z[3] = 0;
+    z[4] = 0;
+    z[5] = 0;
+    z[6] = 0;
+    z[7] = 0;
+
+    return;
+  }
+  */
+
+  u32 t1[8];
+
+  t1[0] = x[0];
+  t1[1] = x[1];
+  t1[2] = x[2];
+  t1[3] = x[3];
+  t1[4] = x[4];
+  t1[5] = x[5];
+  t1[6] = x[6];
+  t1[7] = x[7];
+
+  u32 t2[8];
+
+  t2[0] = y[0];
+  t2[1] = y[1];
+  t2[2] = y[2];
+  t2[3] = y[3];
+  t2[4] = y[4];
+  t2[5] = y[5];
+  t2[6] = y[6];
+  t2[7] = y[7];
+
+  u32 t3[8];
+
+  t3[0] = z[0];
+  t3[1] = z[1];
+  t3[2] = z[2];
+  t3[3] = z[3];
+  t3[4] = z[4];
+  t3[5] = z[5];
+  t3[6] = z[6];
+  t3[7] = z[7];
+
+  u32 t4[8];
+  u32 t5[8];
+  u32 t6[8];
+
+  mul_mod (t4, t1, t1); // t4 = x^2
+
+  mul_mod (t5, t2, t2); // t5 = y^2
+
+  mul_mod (t1, t1, t5); // t1 = x*y^2
+
+  mul_mod (t5, t5, t5); // t5 = t5^2 = y^4
+
+  // here the z^2 and z^4 is not needed for a = 0
+
+  mul_mod (t3, t2, t3); // t3 = x * z
+
+  add_mod (t2, t4, t4); // t2 = 2 * t4 = 2 * x^2
+  add_mod (t4, t4, t2); // t4 = 3 * t4 = 3 * x^2
+
+  // a * z^4 = 0 * 1^4 = 0
+
+  // don't discard the least significant bit it's important too!
+
+  u32 c = 0;
+
+  if (t4[0] & 1)
+  {
+    u32 t[8];
+
+    t[0] = SECP256K1_P0;
+    t[1] = SECP256K1_P1;
+    t[2] = SECP256K1_P2;
+    t[3] = SECP256K1_P3;
+    t[4] = SECP256K1_P4;
+    t[5] = SECP256K1_P5;
+    t[6] = SECP256K1_P6;
+    t[7] = SECP256K1_P7;
+
+    c = add (t4, t4, t); // t4 + SECP256K1_P
+  }
+
+  // right shift (t4 / 2):
+
+  t4[0] = t4[0] >> 1 | t4[1] << 31;
+  t4[1] = t4[1] >> 1 | t4[2] << 31;
+  t4[2] = t4[2] >> 1 | t4[3] << 31;
+  t4[3] = t4[3] >> 1 | t4[4] << 31;
+  t4[4] = t4[4] >> 1 | t4[5] << 31;
+  t4[5] = t4[5] >> 1 | t4[6] << 31;
+  t4[6] = t4[6] >> 1 | t4[7] << 31;
+  t4[7] = t4[7] >> 1 | c     << 31;
+
+  mul_mod (t6, t4, t4); // t6 = t4^2 = (3/2 * x^2)^2
+
+  add_mod (t2, t1, t1); // t2 = 2 * t1
+
+  sub_mod (t6, t6, t2); // t6 = t6 - t2
+  sub_mod (t1, t1, t6); // t1 = t1 - t6
+
+  mul_mod (t4, t4, t1); // t4 = t4 * t1
+
+  sub_mod (t1, t4, t5); // t1 = t4 - t5
+
+  // => x = t6, y = t1, z = t3:
+
+  x[0] = t6[0];
+  x[1] = t6[1];
+  x[2] = t6[2];
+  x[3] = t6[3];
+  x[4] = t6[4];
+  x[5] = t6[5];
+  x[6] = t6[6];
+  x[7] = t6[7];
+
+  y[0] = t1[0];
+  y[1] = t1[1];
+  y[2] = t1[2];
+  y[3] = t1[3];
+  y[4] = t1[4];
+  y[5] = t1[5];
+  y[6] = t1[6];
+  y[7] = t1[7];
+
+  z[0] = t3[0];
+  z[1] = t3[1];
+  z[2] = t3[2];
+  z[3] = t3[3];
+  z[4] = t3[4];
+  z[5] = t3[5];
+  z[6] = t3[6];
+  z[7] = t3[7];
+}
+
+DECLSPEC void point_add (u32 x1[8], u32 y1[8], u32 z1[8], const u32 x2[8], const u32 y2[8], const u32 z2[8])
+{
+  // How often does this really happen? it should "almost" never happen (but would be safer)
+
+  /*
+  if ((y2[0] | y2[1] | y2[2] | y2[3] | y2[4] | y2[5] | y2[6] | y2[7]) == 0) return;
+
+  if ((y1[0] | y1[1] | y1[2] | y1[3] | y1[4] | y1[5] | y1[6] | y1[7]) == 0)
+  {
+    x1[0] = x2[0];
+    x1[1] = x2[1];
+    x1[2] = x2[2];
+    x1[3] = x2[3];
+    x1[4] = x2[4];
+    x1[5] = x2[5];
+    x1[6] = x2[6];
+    x1[7] = x2[7];
+
+    y1[0] = y2[0];
+    y1[1] = y2[1];
+    y1[2] = y2[2];
+    y1[3] = y2[3];
+    y1[4] = y2[4];
+    y1[5] = y2[5];
+    y1[6] = y2[6];
+    y1[7] = y2[7];
+
+    z1[0] = z2[0];
+    z1[1] = z2[1];
+    z1[2] = z2[2];
+    z1[3] = z2[3];
+    z1[4] = z2[4];
+    z1[5] = z2[5];
+    z1[6] = z2[6];
+    z1[7] = z2[7];
+
+    return;
+  }
+  */
+
+  // if x1 == x2 and y2 == y2 and z2 == z2 we need to double instead?
+
+  // x1/y1/z1:
+
+  u32 t1[8];
+
+  t1[0] = x1[0];
+  t1[1] = x1[1];
+  t1[2] = x1[2];
+  t1[3] = x1[3];
+  t1[4] = x1[4];
+  t1[5] = x1[5];
+  t1[6] = x1[6];
+  t1[7] = x1[7];
+
+  u32 t2[8];
+
+  t2[0] = y1[0];
+  t2[1] = y1[1];
+  t2[2] = y1[2];
+  t2[3] = y1[3];
+  t2[4] = y1[4];
+  t2[5] = y1[5];
+  t2[6] = y1[6];
+  t2[7] = y1[7];
+
+  u32 t3[8];
+
+  t3[0] = z1[0];
+  t3[1] = z1[1];
+  t3[2] = z1[2];
+  t3[3] = z1[3];
+  t3[4] = z1[4];
+  t3[5] = z1[5];
+  t3[6] = z1[6];
+  t3[7] = z1[7];
+
+  // x2/y2/z2:
+
+  u32 t4[8];
+
+  t4[0] = x2[0];
+  t4[1] = x2[1];
+  t4[2] = x2[2];
+  t4[3] = x2[3];
+  t4[4] = x2[4];
+  t4[5] = x2[5];
+  t4[6] = x2[6];
+  t4[7] = x2[7];
+
+  u32 t5[8];
+
+  t5[0] = y2[0];
+  t5[1] = y2[1];
+  t5[2] = y2[2];
+  t5[3] = y2[3];
+  t5[4] = y2[4];
+  t5[5] = y2[5];
+  t5[6] = y2[6];
+  t5[7] = y2[7];
+
+  u32 t6[8];
+
+  t6[0] = z2[0];
+  t6[1] = z2[1];
+  t6[2] = z2[2];
+  t6[3] = z2[3];
+  t6[4] = z2[4];
+  t6[5] = z2[5];
+  t6[6] = z2[6];
+  t6[7] = z2[7];
+
+  u32 t7[8];
+
+  mul_mod (t7, t3, t3); // t7 = z1^2
+  mul_mod (t4, t4, t7); // t4 = x2 * z1^2 = B
+
+  mul_mod (t5, t5, t3); // t5 = y2 * z1
+  mul_mod (t5, t5, t7); // t5 = y2 * z1^3 = D
+
+  mul_mod (t7, t6, t6); // t7 = z2^2
+
+  mul_mod (t1, t1, t7); // t1 = x1 * z2^2
+
+  mul_mod (t2, t2, t6); // t2 = y1 * z2
+  mul_mod (t2, t2, t7); // t2 = y1 * z2^3 = C
+
+  sub_mod (t1, t1, t4); // t1 = A - B = E
+
+  mul_mod (t3, t6, t3); // t3 = z1 * z2
+  mul_mod (t3, t1, t3); // t3 = z1 * z2 * E = Z3
+
+  sub_mod (t2, t2, t5); // t2 = C - D = F
+
+  mul_mod (t7, t1, t1); // t7 = E^2
+  mul_mod (t6, t2, t2); // t6 = F^2
+
+  mul_mod (t4, t4, t7); // t4 = B * E^2
+  mul_mod (t1, t7, t1); // t1 = E^3
+
+  sub_mod (t6, t6, t1); // t6 = F^2 - E^3
+
+  add_mod (t7, t4, t4); // t7 = 2 * B * E^2
+
+  sub_mod (t6, t6, t7); // t6 = F^2 - E^2 - 2 * B * E^2 = X3
+  sub_mod (t4, t4, t6); // t4 = B * E^2 - X3
+
+  mul_mod (t2, t2, t4); // t2 = F * (B * E^2 - X3)
+  mul_mod (t7, t5, t1); // t7 = D * E^3
+
+  sub_mod (t7, t2, t7); // t7 = F * (B * E^2 - X3) - D * E^3 = Y3
+
+  x1[0] = t6[0];
+  x1[1] = t6[1];
+  x1[2] = t6[2];
+  x1[3] = t6[3];
+  x1[4] = t6[4];
+  x1[5] = t6[5];
+  x1[6] = t6[6];
+  x1[7] = t6[7];
+
+  y1[0] = t7[0];
+  y1[1] = t7[1];
+  y1[2] = t7[2];
+  y1[3] = t7[3];
+  y1[4] = t7[4];
+  y1[5] = t7[5];
+  y1[6] = t7[6];
+  y1[7] = t7[7];
+
+  z1[0] = t3[0];
+  z1[1] = t3[1];
+  z1[2] = t3[2];
+  z1[3] = t3[3];
+  z1[4] = t3[4];
+  z1[5] = t3[5];
+  z1[6] = t3[6];
+  z1[7] = t3[7];
+}
+
+DECLSPEC void point_get_coords (secp256k1_t *r, const u32 x[8], const u32 y[8])
+{
+  // init the values with x and y:
+
+  u32 x1[8];
+
+  x1[0] = x[0];
+  x1[1] = x[1];
+  x1[2] = x[2];
+  x1[3] = x[3];
+  x1[4] = x[4];
+  x1[5] = x[5];
+  x1[6] = x[6];
+  x1[7] = x[7];
+
+  u32 y1[8];
+
+  y1[0] = y[0];
+  y1[1] = y[1];
+  y1[2] = y[2];
+  y1[3] = y[3];
+  y1[4] = y[4];
+  y1[5] = y[5];
+  y1[6] = y[6];
+  y1[7] = y[7];
+
+  u32 t1[8];
+
+  t1[0] = y[0];
+  t1[1] = y[1];
+  t1[2] = y[2];
+  t1[3] = y[3];
+  t1[4] = y[4];
+  t1[5] = y[5];
+  t1[6] = y[6];
+  t1[7] = y[7];
+
+  // we use jacobian forms and the convertion with z = 1 is basically a NO-OP:
+  // X = X1 * z^2 = X1, Y = Y1 * z^3 = Y
+
+  // https://eprint.iacr.org/2011/338.pdf
+
+  // initial jacobian doubling
+
+  u32 t2[8];
+  u32 t3[8];
+  u32 t4[8];
+
+  mul_mod (t2, x1, x1); // t2 = x1^2
+  mul_mod (t3, y1, y1); // t3 = y1^2
+
+  mul_mod (x1, x1, t3); // x1 = x1*y1^2
+
+  mul_mod (t3, t3, t3); // t3 = t3^2 = y1^4
+
+  // here the z^2 and z^4 is not needed for a = 0 (and furthermore we have z = 1)
+
+  add_mod (y1, t2, t2); // y1 = 2 * t2 = 2 * x1^2
+  add_mod (t2, y1, t2); // t2 = 3 * t2 = 3 * x1^2
+
+  // a * z^4 = 0 * 1^4 = 0
+
+  // don't discard the least significant bit it's important too!
+
+  u32 c = 0;
+
+  if (t2[0] & 1)
+  {
+    u32 t[8];
+
+    t[0] = SECP256K1_P0;
+    t[1] = SECP256K1_P1;
+    t[2] = SECP256K1_P2;
+    t[3] = SECP256K1_P3;
+    t[4] = SECP256K1_P4;
+    t[5] = SECP256K1_P5;
+    t[6] = SECP256K1_P6;
+    t[7] = SECP256K1_P7;
+
+    c = add (t2, t2, t); // t2 + SECP256K1_P
+  }
+
+  // right shift (t2 / 2):
+
+  t2[0] = t2[0] >> 1 | t2[1] << 31;
+  t2[1] = t2[1] >> 1 | t2[2] << 31;
+  t2[2] = t2[2] >> 1 | t2[3] << 31;
+  t2[3] = t2[3] >> 1 | t2[4] << 31;
+  t2[4] = t2[4] >> 1 | t2[5] << 31;
+  t2[5] = t2[5] >> 1 | t2[6] << 31;
+  t2[6] = t2[6] >> 1 | t2[7] << 31;
+  t2[7] = t2[7] >> 1 | c     << 31;
+
+  mul_mod (t4, t2, t2); // t4 = t2^2 = (3/2*x1^2)^2
+
+  add_mod (y1, x1, x1); // y1 = 2 * x1_new
+
+  sub_mod (t4, t4, y1); // t4 = t4 - y1_new
+  sub_mod (x1, x1, t4); // x1 = x1 - t4
+
+  mul_mod (t2, t2, x1); // t2 = t2 * x1_new
+
+  sub_mod (x1, t2, t3); // x1 = t2 - t3
+
+  // => X = t4, Y = x1, Z = t1:
+  // (and t2, t3 can now be safely reused)
+
+  // convert to affine coordinates (to save some bytes copied around) and store it:
+
+  u32 inv[8];
+
+  inv[0] = t1[0];
+  inv[1] = t1[1];
+  inv[2] = t1[2];
+  inv[3] = t1[3];
+  inv[4] = t1[4];
+  inv[5] = t1[5];
+  inv[6] = t1[6];
+  inv[7] = t1[7];
+
+  inv_mod (inv);
+
+  mul_mod (t2, inv, inv); // t2 = inv^2
+  mul_mod (t3, inv, t2);  // t3 = inv^3
+
+  // output to y1
+
+  mul_mod (t3, t3, x1);
+
+  r->xy[31] = t3[7];
+  r->xy[30] = t3[6];
+  r->xy[29] = t3[5];
+  r->xy[28] = t3[4];
+  r->xy[27] = t3[3];
+  r->xy[26] = t3[2];
+  r->xy[25] = t3[1];
+  r->xy[24] = t3[0];
+
+  // output to x1
+
+  mul_mod (t3, t2, t4);
+
+  r->xy[23] = t3[7];
+  r->xy[22] = t3[6];
+  r->xy[21] = t3[5];
+  r->xy[20] = t3[4];
+  r->xy[19] = t3[3];
+  r->xy[18] = t3[2];
+  r->xy[17] = t3[1];
+  r->xy[16] = t3[0];
+
+  // also store orginal x/y:
+
+  r->xy[15] = y[7];
+  r->xy[14] = y[6];
+  r->xy[13] = y[5];
+  r->xy[12] = y[4];
+  r->xy[11] = y[3];
+  r->xy[10] = y[2];
+  r->xy[ 9] = y[1];
+  r->xy[ 8] = y[0];
+
+  r->xy[ 7] = x[7];
+  r->xy[ 6] = x[6];
+  r->xy[ 5] = x[5];
+  r->xy[ 4] = x[4];
+  r->xy[ 3] = x[3];
+  r->xy[ 2] = x[2];
+  r->xy[ 1] = x[1];
+  r->xy[ 0] = x[0];
+
+
+  // do the double of the double (i.e. "triple") too, just in case we need it in the main loop:
+
+  point_double (t4, x1, t1);
+
+  // convert to affine coordinates and store it:
+
+  inv_mod (t1);
+
+  mul_mod (t2, t1, t1); // t2 = t1^2
+  mul_mod (t3, t1, t2); // t3 = t1^3
+
+  // output to y1
+
+  mul_mod (t3, t3, x1);
+
+  r->xy[47] = t3[7];
+  r->xy[46] = t3[6];
+  r->xy[45] = t3[5];
+  r->xy[44] = t3[4];
+  r->xy[43] = t3[3];
+  r->xy[42] = t3[2];
+  r->xy[41] = t3[1];
+  r->xy[40] = t3[0];
+
+  // output to x1
+
+  mul_mod (t3, t2, t4);
+
+  r->xy[39] = t3[7];
+  r->xy[38] = t3[6];
+  r->xy[37] = t3[5];
+  r->xy[36] = t3[4];
+  r->xy[35] = t3[3];
+  r->xy[34] = t3[2];
+  r->xy[33] = t3[1];
+  r->xy[32] = t3[0];
+}
+
+DECLSPEC void point_mul (u32 r[9], const u32 k[8], GLOBAL_AS const secp256k1_t *tmps)
+{
+  // first check the position of the least significant bit
+
+  // the following fancy shift operation just checks the last 2 bits, finds the
+  // least significant bit (set to 1) and updates idx according to this table:
+  // last bits  | idx
+  // 0bxxxxxx00 | 2
+  // 0bxxxxxx01 | 0
+  // 0bxxxxxx10 | 1
+  // 0bxxxxxx11 | 0
+
+  const u32 idx = (0x0102 >> ((k[0] & 3) << 2)) & 3;
+
+  const u32 offset = idx << 4; // * (8 + 8) = 16 (=> offset of 16 u32 = 16 * 4 bytes)
+
+  u32 x1[8];
+
+  x1[0] = tmps->xy[offset +  0];
+  x1[1] = tmps->xy[offset +  1];
+  x1[2] = tmps->xy[offset +  2];
+  x1[3] = tmps->xy[offset +  3];
+  x1[4] = tmps->xy[offset +  4];
+  x1[5] = tmps->xy[offset +  5];
+  x1[6] = tmps->xy[offset +  6];
+  x1[7] = tmps->xy[offset +  7];
+
+  u32 y1[8];
+
+  y1[0] = tmps->xy[offset +  8];
+  y1[1] = tmps->xy[offset +  9];
+  y1[2] = tmps->xy[offset + 10];
+  y1[3] = tmps->xy[offset + 11];
+  y1[4] = tmps->xy[offset + 12];
+  y1[5] = tmps->xy[offset + 13];
+  y1[6] = tmps->xy[offset + 14];
+  y1[7] = tmps->xy[offset + 15];
+
+  u32 z1[8] = { 0 };
+
+  z1[0] = 1;
+
+  // do NOT allow to overflow the tmps->xy buffer:
+
+  u32 final_offset = offset;
+
+  if (final_offset > 16) final_offset = 16;
+
+  u32 x2[8];
+
+  x2[0] = tmps->xy[final_offset + 16];
+  x2[1] = tmps->xy[final_offset + 17];
+  x2[2] = tmps->xy[final_offset + 18];
+  x2[3] = tmps->xy[final_offset + 19];
+  x2[4] = tmps->xy[final_offset + 20];
+  x2[5] = tmps->xy[final_offset + 21];
+  x2[6] = tmps->xy[final_offset + 22];
+  x2[7] = tmps->xy[final_offset + 23];
+
+  u32 y2[8];
+
+  y2[0] = tmps->xy[final_offset + 24];
+  y2[1] = tmps->xy[final_offset + 25];
+  y2[2] = tmps->xy[final_offset + 26];
+  y2[3] = tmps->xy[final_offset + 27];
+  y2[4] = tmps->xy[final_offset + 28];
+  y2[5] = tmps->xy[final_offset + 29];
+  y2[6] = tmps->xy[final_offset + 30];
+  y2[7] = tmps->xy[final_offset + 31];
+
+  u32 z2[8] = { 0 };
+
+  z2[0] = 1;
+
+  // ... then find out the position of the most significant bit
+
+  int loop_start = idx;
+  int loop_end   = 255;
+
+  for (int i = 255; i > 0; i--) // or use: i > idx
+  {
+    u32 idx = i >> 5; // the current u32 (each consisting of 2^5 = 32 bits) to inspect
+
+    u32 mask = 1 << (i & 0x1f);
+
+    if (k[idx] & mask) break; // found it !
+
+    loop_end--;
+  }
+
+  /*
+   * Start
+   */
+
+  // "just" double until we find the first add (where the first bit is set):
+
+  for (int pos = loop_start; pos < loop_end; pos++)
+  {
+    const u32 idx = pos >> 5;
+
+    const u32 mask = 1 << (pos & 0x1f);
+
+    if (k[idx] & mask) break;
+
+    point_double (x2, y2, z2);
+
+    loop_start++;
+  }
+
+  // for case 0 and 1 we can skip the double (we already did it in the host)
+
+  if (idx > 1)
+  {
+    x1[0] = x2[0];
+    x1[1] = x2[1];
+    x1[2] = x2[2];
+    x1[3] = x2[3];
+    x1[4] = x2[4];
+    x1[5] = x2[5];
+    x1[6] = x2[6];
+    x1[7] = x2[7];
+
+    y1[0] = y2[0];
+    y1[1] = y2[1];
+    y1[2] = y2[2];
+    y1[3] = y2[3];
+    y1[4] = y2[4];
+    y1[5] = y2[5];
+    y1[6] = y2[6];
+    y1[7] = y2[7];
+
+    z1[0] = z2[0];
+    z1[1] = z2[1];
+    z1[2] = z2[2];
+    z1[3] = z2[3];
+    z1[4] = z2[4];
+    z1[5] = z2[5];
+    z1[6] = z2[6];
+    z1[7] = z2[7];
+
+    point_double (x2, y2, z2);
+  }
+
+  // main loop (right-to-left binary algorithm):
+
+  for (int pos = loop_start + 1; pos < loop_end; pos++)
+  {
+    u32 idx = pos >> 5;
+
+    u32 mask = 1 << (pos & 0x1f);
+
+    // add only if needed:
+
+    if (k[idx] & mask)
+    {
+      point_add (x1, y1, z1, x2, y2, z2);
+    }
+
+    // always double:
+
+    point_double (x2, y2, z2);
+  }
+
+  // handle last one:
+
+  //const u32 final_idx = loop_end >> 5;
+  //const u32 mask      = 1 << (loop_end & 0x1f);
+
+  //if (k[final_idx] & mask)
+  //{
+  // here we just assume that we have at least 2 bits set (an initial one and one additional bit)
+  // this could be dangerous/wrong in some situations, but very, very, very unlikely
+  point_add (x1, y1, z1, x2, y2, z2);
+  //}
+
+  /*
+   * Get the corresponding affine coordinates x/y:
+   *
+   * Note:
+   * x1_affine = x1_jacobian / z1^2 = x1_jacobian * z1_inv^2
+   * y1_affine = y1_jacobian / z1^2 = y1_jacobian * z1_inv^2
+   *
+   */
+
+  inv_mod (z1);
+
+  // z2 is just used as temporary storage to keep the unmodified z1 for calculating z1^3:
+
+  mul_mod (z2, z1, z1); // z1^2
+  mul_mod (x1, x1, z2); // x1_affine
+
+  mul_mod (z1, z2, z1); // z1^3
+  mul_mod (y1, y1, z1); // y1_affine
+
+  /*
+   * output:
+   */
+
+  // shift by 1 byte (8 bits) to make room and add the parity/sign (for odd/even y):
+
+  r[8] =                (x1[0] << 24);
+  r[7] = (x1[0] >> 8) | (x1[1] << 24);
+  r[6] = (x1[1] >> 8) | (x1[2] << 24);
+  r[5] = (x1[2] >> 8) | (x1[3] << 24);
+  r[4] = (x1[3] >> 8) | (x1[4] << 24);
+  r[3] = (x1[4] >> 8) | (x1[5] << 24);
+  r[2] = (x1[5] >> 8) | (x1[6] << 24);
+  r[1] = (x1[6] >> 8) | (x1[7] << 24);
+  r[0] = (x1[7] >> 8);
+
+  const u32 type = 0x02 | (y1[0] & 1); // (note: 0b10 | 0b01 = 0x03)
+
+  r[0] = r[0] | type << 24; // 0x02 or 0x03
+}
+
+DECLSPEC u32 parse_public (secp256k1_t *r, const u32 k[9])
+{
+  // verify:
+
+  const u32 first_byte = k[0] & 0xff;
+
+  if ((first_byte != '\x02') && (first_byte != '\x03'))
+  {
+    return 1;
+  }
+
+  // load k into x without the first byte:
+
+  u32 x[8];
+
+  x[0] = (k[7] & 0xff00) << 16 | (k[7] & 0xff0000) | (k[7] & 0xff000000) >> 16 | (k[8] & 0xff);
+  x[1] = (k[6] & 0xff00) << 16 | (k[6] & 0xff0000) | (k[6] & 0xff000000) >> 16 | (k[7] & 0xff);
+  x[2] = (k[5] & 0xff00) << 16 | (k[5] & 0xff0000) | (k[5] & 0xff000000) >> 16 | (k[6] & 0xff);
+  x[3] = (k[4] & 0xff00) << 16 | (k[4] & 0xff0000) | (k[4] & 0xff000000) >> 16 | (k[5] & 0xff);
+  x[4] = (k[3] & 0xff00) << 16 | (k[3] & 0xff0000) | (k[3] & 0xff000000) >> 16 | (k[4] & 0xff);
+  x[5] = (k[2] & 0xff00) << 16 | (k[2] & 0xff0000) | (k[2] & 0xff000000) >> 16 | (k[3] & 0xff);
+  x[6] = (k[1] & 0xff00) << 16 | (k[1] & 0xff0000) | (k[1] & 0xff000000) >> 16 | (k[2] & 0xff);
+  x[7] = (k[0] & 0xff00) << 16 | (k[0] & 0xff0000) | (k[0] & 0xff000000) >> 16 | (k[1] & 0xff);
+
+  u32 p[8];
+
+  p[0] = SECP256K1_P0;
+  p[1] = SECP256K1_P1;
+  p[2] = SECP256K1_P2;
+  p[3] = SECP256K1_P3;
+  p[4] = SECP256K1_P4;
+  p[5] = SECP256K1_P5;
+  p[6] = SECP256K1_P6;
+  p[7] = SECP256K1_P7;
+
+  // x must be smaller than p (because of y ^ 2 = x ^ 3 % p)
+
+  for (int i = 7; i >= 0; i--)
+  {
+    if (x[i] < p[i]) break;
+    if (x[i] > p[i]) return 1;
+  }
+
+
+  // get y^2 = x^3 + 7:
+
+  u32 b[8] = { 0 };
+
+  b[0] = SECP256K1_B;
+
+  u32 y[8];
+
+  mul_mod (y, x, x);
+  mul_mod (y, y, x);
+  add_mod (y, y, b);
+
+  // get y = sqrt (y^2):
+
+  sqrt_mod (y);
+
+  // check if it's of the correct parity that we want (odd/even):
+
+  if ((first_byte & 1) != (y[0] & 1))
+  {
+    // y2 = p - y1 (or y2 = y1 * -1)
+
+    sub_mod (y, p, y);
+  }
+
+  // get xy:
+
+  point_get_coords (r, x, y);
+
+  return 0;
+}
diff --git a/OpenCL/inc_ecc_secp256k1.h b/OpenCL/inc_ecc_secp256k1.h
new file mode 100644
index 000000000..501235d4b
--- /dev/null
+++ b/OpenCL/inc_ecc_secp256k1.h
@@ -0,0 +1,40 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _INC_ECC_SECP256K1_H
+#define _INC_ECC_SECP256K1_H
+
+// y^2 = x^3 + ax + b with a = 0 and b = 7 => y^2 = x^3 + 7:
+
+#define SECP256K1_B 7
+
+#define SECP256K1_P0 0xfffffc2f
+#define SECP256K1_P1 0xfffffffe
+#define SECP256K1_P2 0xffffffff
+#define SECP256K1_P3 0xffffffff
+#define SECP256K1_P4 0xffffffff
+#define SECP256K1_P5 0xffffffff
+#define SECP256K1_P6 0xffffffff
+#define SECP256K1_P7 0xffffffff
+
+#define SECP256K1_N0 0xd0364141
+#define SECP256K1_N1 0xbfd25e8c
+#define SECP256K1_N2 0xaf48a03b
+#define SECP256K1_N3 0xbaaedce6
+#define SECP256K1_N4 0xfffffffe
+#define SECP256K1_N5 0xffffffff
+#define SECP256K1_N6 0xffffffff
+#define SECP256K1_N7 0xffffffff
+
+typedef struct secp256k1
+{
+  u32 xy[48]; // all 3 pairs of 32+32 bytes: x,y, x1,y1, x2,y2
+
+} secp256k1_t;
+
+DECLSPEC u32  parse_public (secp256k1_t *r, const u32 k[9]);
+DECLSPEC void point_mul    (u32         *r, const u32 k[8], GLOBAL_AS const secp256k1_t *tmps);
+
+#endif // _INC_ECC_SECP256K1_H
diff --git a/OpenCL/inc_zip_inflate.cl b/OpenCL/inc_zip_inflate.cl
index d43d6bc10..b980e4674 100644
--- a/OpenCL/inc_zip_inflate.cl
+++ b/OpenCL/inc_zip_inflate.cl
@@ -209,6 +209,15 @@ DECLSPEC void *memset(u8 *s, int c, u32 len){
 #define TINFL_MEMSET(p, c, l) memset(p, c, (u32)l)
 #define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
 
+// hashcat-patched/hashcat-specific:
+#ifdef CRC32_IN_INFLATE
+#define M_DICT_SIZE 1
+#define MAYBE_GLOBAL GLOBAL_AS
+#else
+#define M_DICT_SIZE TINFL_LZ_DICT_SIZE
+#define MAYBE_GLOBAL
+#endif
+
 #define TINFL_CR_FINISH }
 #define TINFL_CR_BEGIN  \
     switch (r->m_state) \
@@ -411,14 +420,16 @@ typedef struct
     tinfl_decompressor m_decomp;
     mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
     int m_window_bits;
-    mz_uint8 m_dict[1]; // hashcat-patched: we do not need m_dict because we have our own output buffer
+    // hashcat-patched: we do not need m_dict in case of CRC32 checksums,
+    // because we have our own output buffer:
+    mz_uint8 m_dict[M_DICT_SIZE];
     tinfl_status m_last_status;
 
 } inflate_state;
 
 typedef struct mz_stream_s
 {
-    GLOBAL_AS const unsigned char *next_in; /* pointer to next byte to read */
+    MAYBE_GLOBAL const unsigned char *next_in; /* pointer to next byte to read */
     unsigned int avail_in;        /* number of bytes available at next_in */
     mz_ulong total_in;            /* total number of bytes consumed so far */
 
@@ -457,9 +468,10 @@ DECLSPEC int mz_inflateEnd(mz_streamp pStream);
 
 DECLSPEC int mz_inflateInit2(mz_streamp pStream, int window_bits, inflate_state*);
 
-
+// hashcat-patched/hashcat-specific:
 DECLSPEC const mz_uint8 pIn_xor_byte (const mz_uint8 c, mz_streamp pStream)
 {
+  #ifdef CRC32_IN_INFLATE
   mz_uint8 r = c;
 
   u32 key3;
@@ -469,18 +481,21 @@ DECLSPEC const mz_uint8 pIn_xor_byte (const mz_uint8 c, mz_streamp pStream)
   update_key012 (pStream->key0, pStream->key1, pStream->key2, plain, pStream->crc32tab);
 
   return (mz_uint8) plain;
+  #else
+  return c;
+  #endif
 }
 
 
-DECLSPEC void memcpy_g(void *dest, GLOBAL_AS const void *src, size_t n, mz_streamp pStream){
-  GLOBAL_AS char *csrc = (GLOBAL_AS char *)src;
+DECLSPEC void memcpy_g(void *dest, MAYBE_GLOBAL const void *src, size_t n, mz_streamp pStream){
+  MAYBE_GLOBAL char *csrc = (MAYBE_GLOBAL char *)src;
   char *cdest = (char *)dest;
   for (int i=0; i<n; i++){
     cdest[i] = pIn_xor_byte (csrc[i], pStream);
   }
 }
 
-DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, GLOBAL_AS const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags, mz_streamp pStream)
+DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, MAYBE_GLOBAL const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags, mz_streamp pStream)
 {
 
     const int s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 };
@@ -493,8 +508,8 @@ DECLSPEC tinfl_status tinfl_decompress(tinfl_decompressor *r, GLOBAL_AS const mz
     tinfl_status status = TINFL_STATUS_FAILED;
     mz_uint32 num_bits, dist, counter, num_extra;
     tinfl_bit_buf_t bit_buf;
-    GLOBAL_AS const mz_uint8 *pIn_buf_cur = pIn_buf_next;
-    GLOBAL_AS const mz_uint8 *pIn_buf_end = pIn_buf_next + *pIn_buf_size;
+    MAYBE_GLOBAL const mz_uint8 *pIn_buf_cur = pIn_buf_next;
+    MAYBE_GLOBAL const mz_uint8 *pIn_buf_end = pIn_buf_next + *pIn_buf_size;
     mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
     size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start;
 
@@ -1008,10 +1023,12 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush)
         out_bytes = pStream->avail_out;
         status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags, pStream);
 
+        #ifdef CRC32_IN_INFLATE
         for (int i = 0; i < out_bytes; i++)
         {
           pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[i], pStream->crc32tab);
         }
+        #endif
 
         pState->m_last_status = status;
         pStream->next_in += (mz_uint)in_bytes;
@@ -1040,10 +1057,12 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush)
         n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
         memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
 
+        #ifdef CRC32_IN_INFLATE
         for (int i = 0; i < n; i++)
         {
           pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[i], pStream->crc32tab);
         }
+        #endif
 
         //pStream->next_out += n;
         //pStream->avail_out -= n;
@@ -1072,10 +1091,12 @@ DECLSPEC int mz_inflate(mz_streamp pStream, int flush)
         n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
         memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
 
+        #ifdef CRC32_IN_INFLATE
         for (int i = 0; i < n; i++)
         {
           pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[i], pStream->crc32tab);
         }
+        #endif
 
         //pStream->next_out += n;
         //pStream->avail_out -= n;
@@ -1158,10 +1179,12 @@ DECLSPEC int hc_inflate (mz_streamp pStream)
 
   tinfl_status status = tinfl_decompress (&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out + pStream->total_out, &out_bytes, decomp_flags, pStream);
 
+  #ifdef CRC32_IN_INFLATE
   for (int i = 0; i < out_bytes; i++)
   {
     pStream->crc32 = CRC32 (pStream->crc32, pStream->next_out[pStream->total_out + i], pStream->crc32tab);
   }
+  #endif
 
   pStream->next_in  += (mz_uint) in_bytes;
   pStream->avail_in -= (mz_uint) in_bytes;
diff --git a/OpenCL/m21700-pure.cl b/OpenCL/m21700-pure.cl
index 03b899701..45bd04ee0 100644
--- a/OpenCL/m21700-pure.cl
+++ b/OpenCL/m21700-pure.cl
@@ -13,12 +13,15 @@
 #include "inc_simd.cl"
 #include "inc_hash_sha256.cl"
 #include "inc_hash_sha512.cl"
+#include "inc_ecc_secp256k1.cl"
 #endif
 
 #define COMPARE_M "inc_comp_multi.cl"
 
 typedef struct electrum
 {
+  secp256k1_t coords;
+
   u32 data_buf[4096];
   u32 data_len;
 
@@ -34,16 +37,6 @@ typedef struct electrum_tmp
 
 } electrum_tmp_t;
 
-typedef struct
-{
-  u32 ukey[8];
-
-  u32 pubkey[9]; // 32 + 1 bytes (for sign of the curve point)
-
-  u32 hook_success;
-
-} electrum_hook_t;
-
 DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest)
 {
   digest[0] = ipad[0];
@@ -102,7 +95,7 @@ DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w
   sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest);
 }
 
-KERNEL_FQ void m21700_init (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t))
+KERNEL_FQ void m21700_init (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t))
 {
   /**
    * base
@@ -199,7 +192,7 @@ KERNEL_FQ void m21700_init (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum
   tmps[gid].out[7] = tmps[gid].dgst[7];
 }
 
-KERNEL_FQ void m21700_loop (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t))
+KERNEL_FQ void m21700_loop (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t))
 {
   const u64 gid = get_global_id (0);
 
@@ -322,8 +315,12 @@ KERNEL_FQ void m21700_loop (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum
   unpack64v (tmps, out, gid, 7, out[7]);
 }
 
-KERNEL_FQ void m21700_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t))
+KERNEL_FQ void m21700_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t))
 {
+  /**
+   * base
+   */
+
   const u64 gid = get_global_id (0);
 
   if (gid >= gid_max) return;
@@ -339,27 +336,9 @@ KERNEL_FQ void m21700_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electr
   out[6] = tmps[gid].out[6];
   out[7] = tmps[gid].out[7];
 
-  // we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo):
-  // the modulus is the secp256k1 group order
-
   /*
-    the general modulo by shift and substract code (a = a % b):
-
-    x = b;
-
-    t = a >> 1;
-
-    while (x <= t) x <<= 1;
-
-    while (a >= b)
-    {
-      if (a >= x) a -= x;
-
-      x >>= 1;
-    }
-
-    return a; // remainder
-  */
+   * First calculate the modulo of the pbkdf2 hash with SECP256K1_N:
+   */
 
   u32 a[16];
 
@@ -380,302 +359,43 @@ KERNEL_FQ void m21700_hook23 (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electr
   a[14] = h32_from_64_S (out[7]);
   a[15] = l32_from_64_S (out[7]);
 
-  u32 b[16];
-
-  b[ 0] = 0x00000000;
-  b[ 1] = 0x00000000;
-  b[ 2] = 0x00000000;
-  b[ 3] = 0x00000000;
-  b[ 4] = 0x00000000;
-  b[ 5] = 0x00000000;
-  b[ 6] = 0x00000000;
-  b[ 7] = 0x00000000;
-  b[ 8] = 0xffffffff;
-  b[ 9] = 0xffffffff;
-  b[10] = 0xffffffff;
-  b[11] = 0xfffffffe;
-  b[12] = 0xbaaedce6;
-  b[13] = 0xaf48a03b;
-  b[14] = 0xbfd25e8c;
-  b[15] = 0xd0364141;
+  mod_512 (a);
 
-  /*
-   * Start:
-   */
+  // copy the last 256 bit (32 bytes) of modulo (a):
 
-  // x = b (but with a fast "shift" trick to avoid the while loop)
-
-  u32 x[16];
-
-  x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the
-  x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1
-  x[ 2] = b[10];
-  x[ 3] = b[11];
-  x[ 4] = b[12];
-  x[ 5] = b[13];
-  x[ 6] = b[14];
-  x[ 7] = b[15];
-  x[ 8] = 0x00000000;
-  x[ 9] = 0x00000000;
-  x[10] = 0x00000000;
-  x[11] = 0x00000000;
-  x[12] = 0x00000000;
-  x[13] = 0x00000000;
-  x[14] = 0x00000000;
-  x[15] = 0x00000000;
-
-  // a >= b
-
-  while (a[0] >= b[0])
-  {
-    const u32 l1 = (a[ 0]  < b[ 0]) <<  0
-                 | (a[ 1]  < b[ 1]) <<  1
-                 | (a[ 2]  < b[ 2]) <<  2
-                 | (a[ 3]  < b[ 3]) <<  3
-                 | (a[ 4]  < b[ 4]) <<  4
-                 | (a[ 5]  < b[ 5]) <<  5
-                 | (a[ 6]  < b[ 6]) <<  6
-                 | (a[ 7]  < b[ 7]) <<  7
-                 | (a[ 8]  < b[ 8]) <<  8
-                 | (a[ 9]  < b[ 9]) <<  9
-                 | (a[10]  < b[10]) << 10
-                 | (a[11]  < b[11]) << 11
-                 | (a[12]  < b[12]) << 12
-                 | (a[13]  < b[13]) << 13
-                 | (a[14]  < b[14]) << 14
-                 | (a[15]  < b[15]) << 15;
-
-    const u32 e1 = (a[ 0] == b[ 0]) <<  0
-                 | (a[ 1] == b[ 1]) <<  1
-                 | (a[ 2] == b[ 2]) <<  2
-                 | (a[ 3] == b[ 3]) <<  3
-                 | (a[ 4] == b[ 4]) <<  4
-                 | (a[ 5] == b[ 5]) <<  5
-                 | (a[ 6] == b[ 6]) <<  6
-                 | (a[ 7] == b[ 7]) <<  7
-                 | (a[ 8] == b[ 8]) <<  8
-                 | (a[ 9] == b[ 9]) <<  9
-                 | (a[10] == b[10]) << 10
-                 | (a[11] == b[11]) << 11
-                 | (a[12] == b[12]) << 12
-                 | (a[13] == b[13]) << 13
-                 | (a[14] == b[14]) << 14
-                 | (a[15] == b[15]) << 15;
-
-    if (l1)
-    {
-      if (l1 & 0x0001)                              break;
-      if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break;
-      if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break;
-      if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break;
-      if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break;
-      if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break;
-      if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break;
-      if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break;
-      if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break;
-      if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break;
-      if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break;
-      if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break;
-      if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break;
-      if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break;
-      if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break;
-      if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break;
-    }
-
-    // r = x (copy it to have the original values for the subtraction)
-
-    u32 r[16];
-
-    r[ 0] = x[ 0];
-    r[ 1] = x[ 1];
-    r[ 2] = x[ 2];
-    r[ 3] = x[ 3];
-    r[ 4] = x[ 4];
-    r[ 5] = x[ 5];
-    r[ 6] = x[ 6];
-    r[ 7] = x[ 7];
-    r[ 8] = x[ 8];
-    r[ 9] = x[ 9];
-    r[10] = x[10];
-    r[11] = x[11];
-    r[12] = x[12];
-    r[13] = x[13];
-    r[14] = x[14];
-    r[15] = x[15];
-
-    // x >>= 1
-
-    x[15] = x[15] >> 1 | (x[14] & 1) << 31;
-    x[14] = x[14] >> 1 | (x[13] & 1) << 31;
-    x[13] = x[13] >> 1 | (x[12] & 1) << 31;
-    x[12] = x[12] >> 1 | (x[11] & 1) << 31;
-    x[11] = x[11] >> 1 | (x[10] & 1) << 31;
-    x[10] = x[10] >> 1 | (x[ 9] & 1) << 31;
-    x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31;
-    x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31;
-    x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31;
-    x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31;
-    x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31;
-    x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31;
-    x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31;
-    x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31;
-    x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31;
-    x[ 0] = x[ 0] >> 1;
-
-    // if (a >= r) a -= r;
-
-    const u32 l2 = (a[ 0]  < r[ 0]) <<  0
-                 | (a[ 1]  < r[ 1]) <<  1
-                 | (a[ 2]  < r[ 2]) <<  2
-                 | (a[ 3]  < r[ 3]) <<  3
-                 | (a[ 4]  < r[ 4]) <<  4
-                 | (a[ 5]  < r[ 5]) <<  5
-                 | (a[ 6]  < r[ 6]) <<  6
-                 | (a[ 7]  < r[ 7]) <<  7
-                 | (a[ 8]  < r[ 8]) <<  8
-                 | (a[ 9]  < r[ 9]) <<  9
-                 | (a[10]  < r[10]) << 10
-                 | (a[11]  < r[11]) << 11
-                 | (a[12]  < r[12]) << 12
-                 | (a[13]  < r[13]) << 13
-                 | (a[14]  < r[14]) << 14
-                 | (a[15]  < r[15]) << 15;
-
-    const u32 e2 = (a[ 0] == r[ 0]) <<  0
-                 | (a[ 1] == r[ 1]) <<  1
-                 | (a[ 2] == r[ 2]) <<  2
-                 | (a[ 3] == r[ 3]) <<  3
-                 | (a[ 4] == r[ 4]) <<  4
-                 | (a[ 5] == r[ 5]) <<  5
-                 | (a[ 6] == r[ 6]) <<  6
-                 | (a[ 7] == r[ 7]) <<  7
-                 | (a[ 8] == r[ 8]) <<  8
-                 | (a[ 9] == r[ 9]) <<  9
-                 | (a[10] == r[10]) << 10
-                 | (a[11] == r[11]) << 11
-                 | (a[12] == r[12]) << 12
-                 | (a[13] == r[13]) << 13
-                 | (a[14] == r[14]) << 14
-                 | (a[15] == r[15]) << 15;
-
-    if (l2)
-    {
-      if (l2 & 0x0001)                              continue;
-      if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue;
-      if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue;
-      if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue;
-      if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue;
-      if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue;
-      if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue;
-      if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue;
-      if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue;
-      if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue;
-      if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue;
-      if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue;
-      if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue;
-      if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue;
-      if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue;
-      if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue;
-    }
-
-    // substract (a -= r):
-
-    r[ 0] = a[ 0] - r[ 0];
-    r[ 1] = a[ 1] - r[ 1];
-    r[ 2] = a[ 2] - r[ 2];
-    r[ 3] = a[ 3] - r[ 3];
-    r[ 4] = a[ 4] - r[ 4];
-    r[ 5] = a[ 5] - r[ 5];
-    r[ 6] = a[ 6] - r[ 6];
-    r[ 7] = a[ 7] - r[ 7];
-    r[ 8] = a[ 8] - r[ 8];
-    r[ 9] = a[ 9] - r[ 9];
-    r[10] = a[10] - r[10];
-    r[11] = a[11] - r[11];
-    r[12] = a[12] - r[12];
-    r[13] = a[13] - r[13];
-    r[14] = a[14] - r[14];
-    r[15] = a[15] - r[15];
-
-    // take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!)
-
-    if (r[ 1] > a[ 1]) r[ 0]--;
-    if (r[ 2] > a[ 2]) r[ 1]--;
-    if (r[ 3] > a[ 3]) r[ 2]--;
-    if (r[ 4] > a[ 4]) r[ 3]--;
-    if (r[ 5] > a[ 5]) r[ 4]--;
-    if (r[ 6] > a[ 6]) r[ 5]--;
-    if (r[ 7] > a[ 7]) r[ 6]--;
-    if (r[ 8] > a[ 8]) r[ 7]--;
-    if (r[ 9] > a[ 9]) r[ 8]--;
-    if (r[10] > a[10]) r[ 9]--;
-    if (r[11] > a[11]) r[10]--;
-    if (r[12] > a[12]) r[11]--;
-    if (r[13] > a[13]) r[12]--;
-    if (r[14] > a[14]) r[13]--;
-    if (r[15] > a[15]) r[14]--;
-
-    a[ 0] = r[ 0];
-    a[ 1] = r[ 1];
-    a[ 2] = r[ 2];
-    a[ 3] = r[ 3];
-    a[ 4] = r[ 4];
-    a[ 5] = r[ 5];
-    a[ 6] = r[ 6];
-    a[ 7] = r[ 7];
-    a[ 8] = r[ 8];
-    a[ 9] = r[ 9];
-    a[10] = r[10];
-    a[11] = r[11];
-    a[12] = r[12];
-    a[13] = r[13];
-    a[14] = r[14];
-    a[15] = r[15];
-  }
+  u32 tweak[8];
 
-  /**
-   * copy the last 256 bit (32 bytes) of modulo (a) to the hook buffer
-   */
+  tweak[0] = a[15];
+  tweak[1] = a[14];
+  tweak[2] = a[13];
+  tweak[3] = a[12];
+  tweak[4] = a[11];
+  tweak[5] = a[10];
+  tweak[6] = a[ 9];
+  tweak[7] = a[ 8];
 
-  hooks[gid].ukey[0] = hc_swap32_S (a[ 8]);
-  hooks[gid].ukey[1] = hc_swap32_S (a[ 9]);
-  hooks[gid].ukey[2] = hc_swap32_S (a[10]);
-  hooks[gid].ukey[3] = hc_swap32_S (a[11]);
-  hooks[gid].ukey[4] = hc_swap32_S (a[12]);
-  hooks[gid].ukey[5] = hc_swap32_S (a[13]);
-  hooks[gid].ukey[6] = hc_swap32_S (a[14]);
-  hooks[gid].ukey[7] = hc_swap32_S (a[15]);
-}
 
-KERNEL_FQ void m21700_comp (KERN_ATTR_TMPS_HOOKS_ESALT (electrum_tmp_t, electrum_hook_t, electrum_t))
-{
-  /**
-   * base
+  /*
+   * the main secp256k1 point multiplication by a scalar/tweak:
    */
 
-  const u64 gid = get_global_id (0);
+  GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[digests_offset].coords;
 
-  if (gid >= gid_max) return;
+  u32 pubkey[64] = { 0 }; // for point_mul () we need: 1 + 32 bytes (for sha512 () we need more)
 
-  if (hooks[gid].hook_success == 0) return;
+  point_mul (pubkey, tweak, coords);
 
-  u32 pubkey[64] = { 0 };
 
-  pubkey[0] = hooks[gid].pubkey[0];
-  pubkey[1] = hooks[gid].pubkey[1];
-  pubkey[2] = hooks[gid].pubkey[2];
-  pubkey[3] = hooks[gid].pubkey[3];
-  pubkey[4] = hooks[gid].pubkey[4];
-  pubkey[5] = hooks[gid].pubkey[5];
-  pubkey[6] = hooks[gid].pubkey[6];
-  pubkey[7] = hooks[gid].pubkey[7];
-  pubkey[8] = hooks[gid].pubkey[8];
+  /*
+   * sha512 () of the pubkey:
+   */
 
   sha512_ctx_t sha512_ctx;
 
-  sha512_init        (&sha512_ctx);
-  sha512_update_swap (&sha512_ctx, pubkey, 33); // 33 because of 32 byte curve point + sign
-  sha512_final       (&sha512_ctx);
+  sha512_init   (&sha512_ctx);
+  sha512_update (&sha512_ctx, pubkey, 33); // 33 because of 32 byte curve point + sign
+  sha512_final  (&sha512_ctx);
+
 
   /*
    * sha256-hmac () of the data_buf
diff --git a/OpenCL/m21800-pure.cl b/OpenCL/m21800-pure.cl
index 3658721e2..ccdf822aa 100644
--- a/OpenCL/m21800-pure.cl
+++ b/OpenCL/m21800-pure.cl
@@ -12,8 +12,19 @@
 #include "inc_common.cl"
 #include "inc_simd.cl"
 #include "inc_hash_sha512.cl"
+#include "inc_ecc_secp256k1.cl"
+#include "inc_cipher_aes.cl"
+#include "inc_zip_inflate.cl"
 #endif
 
+typedef struct electrum
+{
+  secp256k1_t coords;
+
+  u32 data_buf[256];
+
+} electrum_t;
+
 typedef struct electrum_tmp
 {
   u64  ipad[8];
@@ -24,14 +35,6 @@ typedef struct electrum_tmp
 
 } electrum_tmp_t;
 
-typedef struct
-{
-  u32 ukey[8];
-
-  u32 hook_success;
-
-} electrum_hook_t;
-
 DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, u64x *ipad, u64x *opad, u64x *digest)
 {
   digest[0] = ipad[0];
@@ -90,7 +93,7 @@ DECLSPEC void hmac_sha512_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w
   sha512_transform_vector (w0, w1, w2, w3, w4, w5, w6, w7, digest);
 }
 
-KERNEL_FQ void m21800_init (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t))
+KERNEL_FQ void m21800_init (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t))
 {
   /**
    * base
@@ -187,7 +190,7 @@ KERNEL_FQ void m21800_init (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_
   tmps[gid].out[7] = tmps[gid].dgst[7];
 }
 
-KERNEL_FQ void m21800_loop (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t))
+KERNEL_FQ void m21800_loop (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t))
 {
   const u64 gid = get_global_id (0);
 
@@ -310,12 +313,70 @@ KERNEL_FQ void m21800_loop (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_
   unpack64v (tmps, out, gid, 7, out[7]);
 }
 
-KERNEL_FQ void m21800_hook23 (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t))
+KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, electrum_t))
 {
-  const u64 gid = get_global_id (0);
+  const u64 gid = get_global_id  (0);
+  const u64 lid = get_local_id   (0);
+  const u64 lsz = get_local_size (0);
+
+  /**
+   * aes shared
+   */
+
+  #ifdef REAL_SHM
+
+  LOCAL_VK u32 s_td0[256];
+  LOCAL_VK u32 s_td1[256];
+  LOCAL_VK u32 s_td2[256];
+  LOCAL_VK u32 s_td3[256];
+  LOCAL_VK u32 s_td4[256];
+
+  LOCAL_VK u32 s_te0[256];
+  LOCAL_VK u32 s_te1[256];
+  LOCAL_VK u32 s_te2[256];
+  LOCAL_VK u32 s_te3[256];
+  LOCAL_VK u32 s_te4[256];
+
+  for (u32 i = lid; i < 256; i += lsz)
+  {
+    s_td0[i] = td0[i];
+    s_td1[i] = td1[i];
+    s_td2[i] = td2[i];
+    s_td3[i] = td3[i];
+    s_td4[i] = td4[i];
+
+    s_te0[i] = te0[i];
+    s_te1[i] = te1[i];
+    s_te2[i] = te2[i];
+    s_te3[i] = te3[i];
+    s_te4[i] = te4[i];
+  }
+
+  SYNC_THREADS ();
+
+  #else
+
+  CONSTANT_AS u32a *s_td0 = td0;
+  CONSTANT_AS u32a *s_td1 = td1;
+  CONSTANT_AS u32a *s_td2 = td2;
+  CONSTANT_AS u32a *s_td3 = td3;
+  CONSTANT_AS u32a *s_td4 = td4;
+
+  CONSTANT_AS u32a *s_te0 = te0;
+  CONSTANT_AS u32a *s_te1 = te1;
+  CONSTANT_AS u32a *s_te2 = te2;
+  CONSTANT_AS u32a *s_te3 = te3;
+  CONSTANT_AS u32a *s_te4 = te4;
+
+  #endif
 
   if (gid >= gid_max) return;
 
+
+  /*
+   * Start by copying/aligning the data
+   */
+
   u64 out[8];
 
   out[0] = tmps[gid].out[0];
@@ -327,27 +388,9 @@ KERNEL_FQ void m21800_hook23 (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hoo
   out[6] = tmps[gid].out[6];
   out[7] = tmps[gid].out[7];
 
-  // we need to perform a modulo operation with 512-bit % 256-bit (bignum modulo):
-  // the modulus is the secp256k1 group order
-
   /*
-    the general modulo by shift and substract code (a = a % b):
-
-    x = b;
-
-    t = a >> 1;
-
-    while (x <= t) x <<= 1;
-
-    while (a >= b)
-    {
-      if (a >= x) a -= x;
-
-      x >>= 1;
-    }
-
-    return a; // remainder
-  */
+   * First calculate the modulo of the pbkdf2 hash with SECP256K1_N:
+   */
 
   u32 a[16];
 
@@ -368,284 +411,199 @@ KERNEL_FQ void m21800_hook23 (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hoo
   a[14] = h32_from_64_S (out[7]);
   a[15] = l32_from_64_S (out[7]);
 
-  u32 b[16];
-
-  b[ 0] = 0x00000000;
-  b[ 1] = 0x00000000;
-  b[ 2] = 0x00000000;
-  b[ 3] = 0x00000000;
-  b[ 4] = 0x00000000;
-  b[ 5] = 0x00000000;
-  b[ 6] = 0x00000000;
-  b[ 7] = 0x00000000;
-  b[ 8] = 0xffffffff;
-  b[ 9] = 0xffffffff;
-  b[10] = 0xffffffff;
-  b[11] = 0xfffffffe;
-  b[12] = 0xbaaedce6;
-  b[13] = 0xaf48a03b;
-  b[14] = 0xbfd25e8c;
-  b[15] = 0xd0364141;
+  mod_512 (a);
+
+  // copy the last 256 bit (32 bytes) of modulo (a):
+
+  u32 tweak[8];
+
+  tweak[0] = a[15];
+  tweak[1] = a[14];
+  tweak[2] = a[13];
+  tweak[3] = a[12];
+  tweak[4] = a[11];
+  tweak[5] = a[10];
+  tweak[6] = a[ 9];
+  tweak[7] = a[ 8];
+
 
   /*
-   * Start:
+   * the main secp256k1 point multiplication by a scalar/tweak:
    */
 
-  // x = b (but with a fast "shift" trick to avoid the while loop)
-
-  u32 x[16];
-
-  x[ 0] = b[ 8]; // this is a trick: we just put the group order's most significant bit all the
-  x[ 1] = b[ 9]; // way to the top to avoid doing the initial: while (x <= t) x <<= 1
-  x[ 2] = b[10];
-  x[ 3] = b[11];
-  x[ 4] = b[12];
-  x[ 5] = b[13];
-  x[ 6] = b[14];
-  x[ 7] = b[15];
-  x[ 8] = 0x00000000;
-  x[ 9] = 0x00000000;
-  x[10] = 0x00000000;
-  x[11] = 0x00000000;
-  x[12] = 0x00000000;
-  x[13] = 0x00000000;
-  x[14] = 0x00000000;
-  x[15] = 0x00000000;
-
-  // a >= b
-
-  while (a[0] >= b[0])
-  {
-    const u32 l1 = (a[ 0]  < b[ 0]) <<  0
-                 | (a[ 1]  < b[ 1]) <<  1
-                 | (a[ 2]  < b[ 2]) <<  2
-                 | (a[ 3]  < b[ 3]) <<  3
-                 | (a[ 4]  < b[ 4]) <<  4
-                 | (a[ 5]  < b[ 5]) <<  5
-                 | (a[ 6]  < b[ 6]) <<  6
-                 | (a[ 7]  < b[ 7]) <<  7
-                 | (a[ 8]  < b[ 8]) <<  8
-                 | (a[ 9]  < b[ 9]) <<  9
-                 | (a[10]  < b[10]) << 10
-                 | (a[11]  < b[11]) << 11
-                 | (a[12]  < b[12]) << 12
-                 | (a[13]  < b[13]) << 13
-                 | (a[14]  < b[14]) << 14
-                 | (a[15]  < b[15]) << 15;
-
-    const u32 e1 = (a[ 0] == b[ 0]) <<  0
-                 | (a[ 1] == b[ 1]) <<  1
-                 | (a[ 2] == b[ 2]) <<  2
-                 | (a[ 3] == b[ 3]) <<  3
-                 | (a[ 4] == b[ 4]) <<  4
-                 | (a[ 5] == b[ 5]) <<  5
-                 | (a[ 6] == b[ 6]) <<  6
-                 | (a[ 7] == b[ 7]) <<  7
-                 | (a[ 8] == b[ 8]) <<  8
-                 | (a[ 9] == b[ 9]) <<  9
-                 | (a[10] == b[10]) << 10
-                 | (a[11] == b[11]) << 11
-                 | (a[12] == b[12]) << 12
-                 | (a[13] == b[13]) << 13
-                 | (a[14] == b[14]) << 14
-                 | (a[15] == b[15]) << 15;
-
-    if (l1)
-    {
-      if (l1 & 0x0001)                              break;
-      if (l1 & 0x0002) if ((e1 & 0x0001) == 0x0001) break;
-      if (l1 & 0x0004) if ((e1 & 0x0003) == 0x0003) break;
-      if (l1 & 0x0008) if ((e1 & 0x0007) == 0x0007) break;
-      if (l1 & 0x0010) if ((e1 & 0x000f) == 0x000f) break;
-      if (l1 & 0x0020) if ((e1 & 0x001f) == 0x001f) break;
-      if (l1 & 0x0040) if ((e1 & 0x003f) == 0x003f) break;
-      if (l1 & 0x0080) if ((e1 & 0x007f) == 0x007f) break;
-      if (l1 & 0x0100) if ((e1 & 0x00ff) == 0x00ff) break;
-      if (l1 & 0x0200) if ((e1 & 0x01ff) == 0x01ff) break;
-      if (l1 & 0x0400) if ((e1 & 0x03ff) == 0x03ff) break;
-      if (l1 & 0x0800) if ((e1 & 0x07ff) == 0x07ff) break;
-      if (l1 & 0x1000) if ((e1 & 0x0fff) == 0x0fff) break;
-      if (l1 & 0x2000) if ((e1 & 0x1fff) == 0x1fff) break;
-      if (l1 & 0x4000) if ((e1 & 0x3fff) == 0x3fff) break;
-      if (l1 & 0x8000) if ((e1 & 0x7fff) == 0x7fff) break;
-    }
+  GLOBAL_AS secp256k1_t *coords = (GLOBAL_AS secp256k1_t *) &esalt_bufs[digests_offset].coords;
 
-    // r = x (copy it to have the original values for the subtraction)
-
-    u32 r[16];
-
-    r[ 0] = x[ 0];
-    r[ 1] = x[ 1];
-    r[ 2] = x[ 2];
-    r[ 3] = x[ 3];
-    r[ 4] = x[ 4];
-    r[ 5] = x[ 5];
-    r[ 6] = x[ 6];
-    r[ 7] = x[ 7];
-    r[ 8] = x[ 8];
-    r[ 9] = x[ 9];
-    r[10] = x[10];
-    r[11] = x[11];
-    r[12] = x[12];
-    r[13] = x[13];
-    r[14] = x[14];
-    r[15] = x[15];
-
-    // x >>= 1
-
-    x[15] = x[15] >> 1 | (x[14] & 1) << 31;
-    x[14] = x[14] >> 1 | (x[13] & 1) << 31;
-    x[13] = x[13] >> 1 | (x[12] & 1) << 31;
-    x[12] = x[12] >> 1 | (x[11] & 1) << 31;
-    x[11] = x[11] >> 1 | (x[10] & 1) << 31;
-    x[10] = x[10] >> 1 | (x[ 9] & 1) << 31;
-    x[ 9] = x[ 9] >> 1 | (x[ 8] & 1) << 31;
-    x[ 8] = x[ 8] >> 1 | (x[ 7] & 1) << 31;
-    x[ 7] = x[ 7] >> 1 | (x[ 6] & 1) << 31;
-    x[ 6] = x[ 6] >> 1 | (x[ 5] & 1) << 31;
-    x[ 5] = x[ 5] >> 1 | (x[ 4] & 1) << 31;
-    x[ 4] = x[ 4] >> 1 | (x[ 3] & 1) << 31;
-    x[ 3] = x[ 3] >> 1 | (x[ 2] & 1) << 31;
-    x[ 2] = x[ 2] >> 1 | (x[ 1] & 1) << 31;
-    x[ 1] = x[ 1] >> 1 | (x[ 0] & 1) << 31;
-    x[ 0] = x[ 0] >> 1;
-
-    // if (a >= r) a -= r;
-
-    const u32 l2 = (a[ 0]  < r[ 0]) <<  0
-                 | (a[ 1]  < r[ 1]) <<  1
-                 | (a[ 2]  < r[ 2]) <<  2
-                 | (a[ 3]  < r[ 3]) <<  3
-                 | (a[ 4]  < r[ 4]) <<  4
-                 | (a[ 5]  < r[ 5]) <<  5
-                 | (a[ 6]  < r[ 6]) <<  6
-                 | (a[ 7]  < r[ 7]) <<  7
-                 | (a[ 8]  < r[ 8]) <<  8
-                 | (a[ 9]  < r[ 9]) <<  9
-                 | (a[10]  < r[10]) << 10
-                 | (a[11]  < r[11]) << 11
-                 | (a[12]  < r[12]) << 12
-                 | (a[13]  < r[13]) << 13
-                 | (a[14]  < r[14]) << 14
-                 | (a[15]  < r[15]) << 15;
-
-    const u32 e2 = (a[ 0] == r[ 0]) <<  0
-                 | (a[ 1] == r[ 1]) <<  1
-                 | (a[ 2] == r[ 2]) <<  2
-                 | (a[ 3] == r[ 3]) <<  3
-                 | (a[ 4] == r[ 4]) <<  4
-                 | (a[ 5] == r[ 5]) <<  5
-                 | (a[ 6] == r[ 6]) <<  6
-                 | (a[ 7] == r[ 7]) <<  7
-                 | (a[ 8] == r[ 8]) <<  8
-                 | (a[ 9] == r[ 9]) <<  9
-                 | (a[10] == r[10]) << 10
-                 | (a[11] == r[11]) << 11
-                 | (a[12] == r[12]) << 12
-                 | (a[13] == r[13]) << 13
-                 | (a[14] == r[14]) << 14
-                 | (a[15] == r[15]) << 15;
-
-    if (l2)
-    {
-      if (l2 & 0x0001)                              continue;
-      if (l2 & 0x0002) if ((e2 & 0x0001) == 0x0001) continue;
-      if (l2 & 0x0004) if ((e2 & 0x0003) == 0x0003) continue;
-      if (l2 & 0x0008) if ((e2 & 0x0007) == 0x0007) continue;
-      if (l2 & 0x0010) if ((e2 & 0x000f) == 0x000f) continue;
-      if (l2 & 0x0020) if ((e2 & 0x001f) == 0x001f) continue;
-      if (l2 & 0x0040) if ((e2 & 0x003f) == 0x003f) continue;
-      if (l2 & 0x0080) if ((e2 & 0x007f) == 0x007f) continue;
-      if (l2 & 0x0100) if ((e2 & 0x00ff) == 0x00ff) continue;
-      if (l2 & 0x0200) if ((e2 & 0x01ff) == 0x01ff) continue;
-      if (l2 & 0x0400) if ((e2 & 0x03ff) == 0x03ff) continue;
-      if (l2 & 0x0800) if ((e2 & 0x07ff) == 0x07ff) continue;
-      if (l2 & 0x1000) if ((e2 & 0x0fff) == 0x0fff) continue;
-      if (l2 & 0x2000) if ((e2 & 0x1fff) == 0x1fff) continue;
-      if (l2 & 0x4000) if ((e2 & 0x3fff) == 0x3fff) continue;
-      if (l2 & 0x8000) if ((e2 & 0x7fff) == 0x7fff) continue;
-    }
+  u32 pubkey[64] = { 0 }; // for point_mul () we need: 1 + 32 bytes (for sha512 () we need more)
 
-    // substract (a -= r):
-
-    r[ 0] = a[ 0] - r[ 0];
-    r[ 1] = a[ 1] - r[ 1];
-    r[ 2] = a[ 2] - r[ 2];
-    r[ 3] = a[ 3] - r[ 3];
-    r[ 4] = a[ 4] - r[ 4];
-    r[ 5] = a[ 5] - r[ 5];
-    r[ 6] = a[ 6] - r[ 6];
-    r[ 7] = a[ 7] - r[ 7];
-    r[ 8] = a[ 8] - r[ 8];
-    r[ 9] = a[ 9] - r[ 9];
-    r[10] = a[10] - r[10];
-    r[11] = a[11] - r[11];
-    r[12] = a[12] - r[12];
-    r[13] = a[13] - r[13];
-    r[14] = a[14] - r[14];
-    r[15] = a[15] - r[15];
-
-    // take care of the "borrow" (we can't do it the other way around 15...1 because r[x] is changed!)
-
-    if (r[ 1] > a[ 1]) r[ 0]--;
-    if (r[ 2] > a[ 2]) r[ 1]--;
-    if (r[ 3] > a[ 3]) r[ 2]--;
-    if (r[ 4] > a[ 4]) r[ 3]--;
-    if (r[ 5] > a[ 5]) r[ 4]--;
-    if (r[ 6] > a[ 6]) r[ 5]--;
-    if (r[ 7] > a[ 7]) r[ 6]--;
-    if (r[ 8] > a[ 8]) r[ 7]--;
-    if (r[ 9] > a[ 9]) r[ 8]--;
-    if (r[10] > a[10]) r[ 9]--;
-    if (r[11] > a[11]) r[10]--;
-    if (r[12] > a[12]) r[11]--;
-    if (r[13] > a[13]) r[12]--;
-    if (r[14] > a[14]) r[13]--;
-    if (r[15] > a[15]) r[14]--;
-
-    a[ 0] = r[ 0];
-    a[ 1] = r[ 1];
-    a[ 2] = r[ 2];
-    a[ 3] = r[ 3];
-    a[ 4] = r[ 4];
-    a[ 5] = r[ 5];
-    a[ 6] = r[ 6];
-    a[ 7] = r[ 7];
-    a[ 8] = r[ 8];
-    a[ 9] = r[ 9];
-    a[10] = r[10];
-    a[11] = r[11];
-    a[12] = r[12];
-    a[13] = r[13];
-    a[14] = r[14];
-    a[15] = r[15];
-  }
+  point_mul (pubkey, tweak, coords);
 
-  /**
-   * copy the last 256 bit (32 bytes) of modulo (a) to the hook buffer
+
+  /*
+   * sha512 () of the pubkey:
    */
 
-  hooks[gid].ukey[0] = hc_swap32_S (a[ 8]);
-  hooks[gid].ukey[1] = hc_swap32_S (a[ 9]);
-  hooks[gid].ukey[2] = hc_swap32_S (a[10]);
-  hooks[gid].ukey[3] = hc_swap32_S (a[11]);
-  hooks[gid].ukey[4] = hc_swap32_S (a[12]);
-  hooks[gid].ukey[5] = hc_swap32_S (a[13]);
-  hooks[gid].ukey[6] = hc_swap32_S (a[14]);
-  hooks[gid].ukey[7] = hc_swap32_S (a[15]);
-}
+  sha512_ctx_t sha512_ctx;
 
-KERNEL_FQ void m21800_comp (KERN_ATTR_TMPS_HOOKS (electrum_tmp_t, electrum_hook_t))
-{
-  /**
-   * base
+  sha512_init   (&sha512_ctx);
+  sha512_update (&sha512_ctx, pubkey, 33); // 33 because of 32 byte curve point + sign
+  sha512_final  (&sha512_ctx);
+
+  // ... now we have the result in sha512_ctx.h[0]...sha512_ctx.h[7]
+
+  u32 iv[4];
+
+  iv[0] = h32_from_64_S (sha512_ctx.h[0]);
+  iv[1] = l32_from_64_S (sha512_ctx.h[0]);
+  iv[2] = h32_from_64_S (sha512_ctx.h[1]);
+  iv[3] = l32_from_64_S (sha512_ctx.h[1]);
+
+  iv[0] = hc_swap32_S (iv[0]);
+  iv[1] = hc_swap32_S (iv[1]);
+  iv[2] = hc_swap32_S (iv[2]);
+  iv[3] = hc_swap32_S (iv[3]);
+
+  u32 key[4];
+
+  key[0] = h32_from_64_S (sha512_ctx.h[2]);
+  key[1] = l32_from_64_S (sha512_ctx.h[2]);
+  key[2] = h32_from_64_S (sha512_ctx.h[3]);
+  key[3] = l32_from_64_S (sha512_ctx.h[3]);
+
+  key[0] = hc_swap32_S (key[0]);
+  key[1] = hc_swap32_S (key[1]);
+  key[2] = hc_swap32_S (key[2]);
+  key[3] = hc_swap32_S (key[3]);
+
+
+  /*
+   * AES decrypt the data_buf
    */
 
-  const u64 gid = get_global_id (0);
+  // init AES
 
-  if (gid >= gid_max) return;
+  #define KEYLEN 44
+
+  u32 ks[KEYLEN];
+
+  aes128_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
+
+  // #define AES_LEN 1024
+  // in my tests it also worked with only 128 input bytes !
+  #define AES_LEN       128
+  #define AES_LEN_DIV_4  32
+
+  u32 buf_full[AES_LEN_DIV_4];
+
+  // we need to run it at least once:
+
+  GLOBAL_AS u32 *data_buf = (GLOBAL_AS u32 *) esalt_bufs[digests_offset].data_buf;
+
+  u32 data[4];
+
+  data[0] = data_buf[0];
+  data[1] = data_buf[1];
+  data[2] = data_buf[2];
+  data[3] = data_buf[3];
+
+  u32 buf[4];
+
+  aes128_decrypt (ks, data, buf, s_td0, s_td1, s_td2, s_td3, s_td4);
+
+  buf[0] ^= iv[0];
+
+  // early reject
+
+  if ((buf[0] & 0x0007ffff) != 0x00059c78) return;
+
+  buf[1] ^= iv[1];
+  buf[2] ^= iv[2];
+  buf[3] ^= iv[3];
+
+  buf_full[0] = buf[0];
+  buf_full[1] = buf[1];
+  buf_full[2] = buf[2];
+  buf_full[3] = buf[3];
+
+  iv[0] = data[0];
+  iv[1] = data[1];
+  iv[2] = data[2];
+  iv[3] = data[3];
+
+  // for AES_LEN > 16 we need to loop
+
+  for (int i = 16, j = 4; i < AES_LEN; i += 16, j += 4)
+  {
+    data[0] = data_buf[j + 0];
+    data[1] = data_buf[j + 1];
+    data[2] = data_buf[j + 2];
+    data[3] = data_buf[j + 3];
+
+    aes128_decrypt (ks, data, buf, s_td0, s_td1, s_td2, s_td3, s_td4);
+
+    buf[0] ^= iv[0];
+    buf[1] ^= iv[1];
+    buf[2] ^= iv[2];
+    buf[3] ^= iv[3];
+
+    iv[0] = data[0];
+    iv[1] = data[1];
+    iv[2] = data[2];
+    iv[3] = data[3];
+
+    buf_full[j + 0] = buf[0];
+    buf_full[j + 1] = buf[1];
+    buf_full[j + 2] = buf[2];
+    buf_full[j + 3] = buf[3];
+  }
+
+
+  /*
+   * zlib inflate/decompress:
+   */
+
+  mz_stream infstream;
+
+  infstream.opaque    = Z_NULL;
+
+  // input:
+
+  infstream.avail_in  = AES_LEN;
+  infstream.next_in   = (u8 *) buf_full;
+
+  // output:
+
+  #define OUT_SIZE 16
+
+  u8 tmp[OUT_SIZE];
+
+  infstream.avail_out = OUT_SIZE;
+  infstream.next_out  = tmp;
+
+
+  // decompress it:
+
+  inflate_state pStream;
+
+  mz_inflateInit2 (&infstream, MAX_WBITS, &pStream);
+
+  const int zlib_ret = inflate (&infstream, Z_NO_FLUSH);
+
+  if ((zlib_ret != MZ_OK) && (zlib_ret != MZ_STREAM_END))
+  {
+    return;
+  }
+
+
+  /*
+   * Verify if decompressed data is either:
+   * - "{\n    \"" or
+   * - "{\r\n    \""
+   */
 
-  if (hooks[gid].hook_success == 1)
+  if (((tmp[0] == 0x7b) && (tmp[1] == 0x0a) && (tmp[2] == 0x20) && (tmp[3] == 0x20) &&
+       (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x22)) ||
+      ((tmp[0] == 0x7b) && (tmp[1] == 0x0d) && (tmp[2] == 0x0a) && (tmp[3] == 0x20) &&
+       (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x20) && (tmp[7] == 0x22)))
   {
     if (atomic_inc (&hashes_shown[digests_offset]) == 0)
     {
diff --git a/docs/credits.txt b/docs/credits.txt
index 71ad760fa..403a6f261 100644
--- a/docs/credits.txt
+++ b/docs/credits.txt
@@ -56,7 +56,7 @@ Other contributors to hashcat
 * LZMA-SDK by Igor Pavlov
 * zlib by Jean-loup Gailly and Mark Adler
 * win-iconv by Yukihiro Nakadaira
-* secp256k1 library by Pieter Wuille
+* micro-ecc by Ken MacKay (used as reference for some secp256k1 operations)
 
 # Furthermore the following persons helped the project:
 
diff --git a/include/emu_inc_ecc_secp256k1.h b/include/emu_inc_ecc_secp256k1.h
new file mode 100644
index 000000000..a411dafd1
--- /dev/null
+++ b/include/emu_inc_ecc_secp256k1.h
@@ -0,0 +1,14 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#ifndef _EMU_INC_ECC_SECP256K1_H
+#define _EMU_INC_ECC_SECP256K1_H
+
+#include "emu_general.h"
+
+#include "inc_vendor.h"
+#include "inc_ecc_secp256k1.h"
+
+#endif // _EMU_INC_ECC_SECP256K1_H
diff --git a/include/ext_secp256k1.h b/include/ext_secp256k1.h
deleted file mode 100644
index 689a75300..000000000
--- a/include/ext_secp256k1.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/**
- * Author......: See docs/credits.txt
- * License.....: MIT
- */
-
-#ifndef _EXT_SECP256K1_H
-
-#include "secp256k1.h"
-
-bool hc_secp256k1_pubkey_parse     (secp256k1_pubkey *pubkey, u8 *buf, size_t length);
-bool hc_secp256k1_pubkey_tweak_mul (secp256k1_pubkey *pubkey, u8 *buf, size_t length);
-
-#endif // _EXT_SECP256K1_H
diff --git a/src/Makefile b/src/Makefile
index 6520f928c..a922a2362 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -10,13 +10,9 @@ PRODUCTION_VERSION      := v5.1.0
 ENABLE_BRAIN            := 1
 USE_SYSTEM_LZMA         := 0
 USE_SYSTEM_ZLIB         := 0
-USE_SYSTEM_LIBSECP256K1 := 0
 USE_SYSTEM_OPENCL       := 0
 USE_SYSTEM_XXHASH       := 0
 
-# NOTE: USE_SYSTEM_LIBSECP256K1 set to 1 can come with a huge performance hit for Electrum 4-5
-# this is due to the public API (secp256k1.h) not exposing all the faster ECC operations we need
-
 ##
 ## Detect Operating System
 ##
@@ -124,12 +120,6 @@ else
 DEPS_ZLIB_PATH          := $(LIBRARY_DEV_ROOT_FOLDER)/
 endif
 
-ifeq ($(USE_SYSTEM_LIBSECP256K1),0)
-DEPS_LIBSECP256K1_PATH  := deps/secp256k1/
-else
-DEPS_LIBSECP256K1_PATH  := $(LIBRARY_DEV_ROOT_FOLDER)/
-endif
-
 ifeq ($(USE_SYSTEM_OPENCL),0)
 DEPS_OPENCL_PATH        := deps/OpenCL-Headers
 else
@@ -195,11 +185,6 @@ CFLAGS_ZLIB             += -Wno-unused-parameter
 CFLAGS_ZLIB             += -DIOAPI_NO_64
 endif
 
-## because LIBSECP256K1 (Electrum 4/5)
-CFLAGS_LIBSECP256K1     += -Wno-unused-parameter
-CFLAGS_LIBSECP256K1     += -Wno-unused-function
-CFLAGS_LIBSECP256K1     += -Wno-nonnull-compare
-
 ifeq ($(DEBUG),0)
 CFLAGS                  += -O2
 ifneq ($(UNAME),Darwin)
@@ -238,24 +223,6 @@ ifeq ($(USE_SYSTEM_ZLIB),1)
 LFLAGS                  += -lz
 endif
 
-# LIBSECP256K1
-
-ifeq ($(USE_SYSTEM_LIBSECP256K1),1)
-LFLAGS                  += -lsecp256k1
-CFLAGS_LIBSECP256K1     += -DWITH_LIBSECP256K1
-
-# NOT working if used only in CFLAGS_LIBSECP256K1 because we need to include secp256k1.h in the module too
-CFLAGS                  += -I$(DEPS_LIBSECP256K1_PATH)
-else
-CFLAGS_LIBSECP256K1     += -I$(DEPS_LIBSECP256K1_PATH)/src/
-
-# files in deps/secp256k1/ include "include/secp256k1.h" so we need the parent folder too
-CFLAGS_LIBSECP256K1     += -I$(DEPS_LIBSECP256K1_PATH)
-
-# NOT working if used only in CFLAGS_LIBSECP256K1 because we need to include secp256k1.h in the module too
-CFLAGS                  += -I$(DEPS_LIBSECP256K1_PATH)/include/
-endif
-
 # OpenCL
 CFLAGS                  += -I$(DEPS_OPENCL_PATH)
 
@@ -333,10 +300,10 @@ endif # MSYS2
 EMU_OBJS_ALL            := emu_general emu_inc_common emu_inc_platform emu_inc_scalar emu_inc_simd
 EMU_OBJS_ALL            += emu_inc_rp emu_inc_rp_optimized
 EMU_OBJS_ALL            += emu_inc_truecrypt_crc32 emu_inc_truecrypt_keyfile emu_inc_truecrypt_xts emu_inc_veracrypt_xts
-EMU_OBJS_ALL            += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512
+EMU_OBJS_ALL            += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1
 EMU_OBJS_ALL            += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish
 
-OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs ext_lzma ext_secp256k1 filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
+OBJS_ALL                := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
 
 ifeq ($(ENABLE_BRAIN),1)
 OBJS_ALL                += brain
@@ -517,9 +484,6 @@ obj/%.NATIVE.o: $(DEPS_ZLIB_PATH)/%.c
 	$(CC) -c $(CFLAGS_NATIVE) $(CFLAGS_ZLIB) $< -o $@ -fpic
 endif
 
-obj/ext_secp256k1.NATIVE.o: src/ext_secp256k1.c
-	$(CC) -c $(CFLAGS_NATIVE) $(CFLAGS_LIBSECP256K1) $< -o $@ -fpic
-
 ifeq ($(USE_SYSTEM_XXHASH),0)
 ifeq ($(ENABLE_BRAIN),1)
 obj/%.NATIVE.o: $(DEPS_XXHASH_PATH)/%.c
@@ -682,12 +646,6 @@ obj/%.WIN.o:   $(DEPS_XXHASH_PATH)/%.c
 endif
 endif
 
-obj/ext_secp256k1.LINUX.o: src/ext_secp256k1.c
-	$(CC_LINUX) $(CFLAGS_CROSS_LINUX) $(CFLAGS_LIBSECP256K1) -c -o $@ $<
-
-obj/ext_secp256k1.WIN.o: src/ext_secp256k1.c
-	$(CC_WIN) $(CFLAGS_CROSS_WIN) $(CFLAGS_LIBSECP256K1) -c -o $@ $<
-
 obj/combined.LINUX.a: $(LINUX_OBJS)
 	$(AR_LINUX) rcs $@ $^
 
diff --git a/src/emu_inc_ecc_secp256k1.c b/src/emu_inc_ecc_secp256k1.c
new file mode 100644
index 000000000..934824312
--- /dev/null
+++ b/src/emu_inc_ecc_secp256k1.c
@@ -0,0 +1,13 @@
+/**
+ * Author......: See docs/credits.txt
+ * License.....: MIT
+ */
+
+#include "types.h"
+#include "common.h"
+#include "emu_general.h"
+
+#include "inc_vendor.h"
+#include "inc_platform.h"
+#include "inc_ecc_secp256k1.cl"
+
diff --git a/src/ext_secp256k1.c b/src/ext_secp256k1.c
deleted file mode 100644
index ad081af54..000000000
--- a/src/ext_secp256k1.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Author......: See docs/credits.txt
- * License.....: MIT
- */
-
-#include "types.h"
-#include "common.h"
-
-#include "ext_secp256k1.h"
-
-
-#if !defined (WITH_LIBSECP256K1)
-
-// some macros needed for secp256k1 header and source code includes:
-
-// is this a good 64-bit support check ?
-#if !defined (__LP64__) && !defined (_WIN64) && !defined (__x86_64__)
-
-#define USE_SCALAR_8X32
-#define USE_FIELD_10X26
-
-#else
-
-#define HAVE___INT128
-#define USE_ASM_X86_64
-// doesn't change speed much: #define USE_ECMULT_STATIC_PRECOMPUTATION
-
-#define USE_SCALAR_4X64
-#define USE_FIELD_5X52
-
-#endif
-
-#define USE_SCALAR_INV_BUILTIN
-#define USE_FIELD_INV_BUILTIN
-
-#define ECMULT_WINDOW_SIZE   15
-#define ECMULT_GEN_PREC_BITS  4
-
-#define USE_NUM_NONE
-
-#include "secp256k1.c"
-
-#endif
-
-bool hc_secp256k1_pubkey_parse (secp256k1_pubkey *pubkey, u8 *buf, size_t length)
-{
-  secp256k1_context *t_ctx = secp256k1_context_create (SECP256K1_CONTEXT_NONE);
-
-  if (secp256k1_ec_pubkey_parse (t_ctx, pubkey, buf, length) == 0)
-  {
-    secp256k1_context_destroy (t_ctx);
-
-    return false;
-  }
-
-  secp256k1_context_destroy (t_ctx);
-
-  return true;
-}
-
-bool hc_secp256k1_pubkey_tweak_mul (secp256k1_pubkey *pubkey, u8 *buf, size_t length)
-{
-  #if !defined (WITH_LIBSECP256K1)
-
-  secp256k1_context *sctx = secp256k1_context_create (SECP256K1_CONTEXT_NONE);
-
-  secp256k1_gej res;
-  secp256k1_ge  pt;
-
-  // load the public key and 32 byte scalar:
-
-  secp256k1_pubkey_load (sctx, &pt, pubkey);
-
-  int overflow = 0;
-
-  secp256k1_scalar s;
-
-  secp256k1_scalar_set_b32 (&s, buf, &overflow);
-
-  if (overflow != 0)
-  {
-    secp256k1_scalar_clear (&s);
-
-    secp256k1_context_destroy (sctx);
-
-    return false;
-  }
-
-  if (secp256k1_scalar_is_zero (&s))
-  {
-    secp256k1_scalar_clear (&s);
-
-    secp256k1_context_destroy (sctx);
-
-    return false;
-  }
-
-
-  // main multiply operation:
-
-  const size_t scalar_size = (length - 1) * 8;
-
-  secp256k1_ecmult_const (&res, &pt, &s, scalar_size);
-  secp256k1_ge_set_gej   (&pt, &res);
-  secp256k1_fe_normalize (&pt.x);
-  secp256k1_fe_normalize (&pt.y);
-
-
-  // output:
-
-  buf[0] = 0x02 | secp256k1_fe_is_odd (&pt.y);
-
-  secp256k1_fe_get_b32 (buf + 1, &pt.x);
-
-
-  // cleanup:
-
-  secp256k1_scalar_clear (&s);
-
-  secp256k1_context_destroy (sctx);
-
-  #else
-
-  // ATTENTION: this way to multiply was much slower in our tests
-
-  secp256k1_context *sctx = secp256k1_context_create (SECP256K1_CONTEXT_VERIFY);
-
-
-  // main multiply operation:
-
-  if (secp256k1_ec_pubkey_tweak_mul (sctx, pubkey, buf) == 0)
-  {
-    secp256k1_context_destroy (sctx);
-
-    return false;
-  }
-
-
-  // output:
-
-  secp256k1_ec_pubkey_serialize (sctx, buf, &length, pubkey, SECP256K1_EC_COMPRESSED);
-
-
-  // cleanup:
-
-  secp256k1_context_destroy (sctx);
-
-  #endif
-
-  return true;
-}
diff --git a/src/modules/module_21700.c b/src/modules/module_21700.c
index c6fa73ecd..49155aabe 100644
--- a/src/modules/module_21700.c
+++ b/src/modules/module_21700.c
@@ -10,7 +10,7 @@
 #include "convert.h"
 #include "shared.h"
 #include "memory.h"
-#include "ext_secp256k1.h"
+#include "emu_inc_ecc_secp256k1.h"
 
 static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
 static const u32   DGST_POS0      = 0;
@@ -24,8 +24,7 @@ static const u64   KERN_TYPE      = 21700;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_HOOK23;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$electrum$4*03eae309d8bda5dcbddaae8145469193152763894b7260a6c4ba181b3ac2ed5653*8c594086a64dc87a9c1f8a69f646e31e8d3182c3c722def4427aa20684776ac26092c6f60bf2762e27adfa93fe1e952dcb8d6362224b9a371953aa3a2edb596ce5eb4c0879c4353f2cc515ec6c9e7a6defa26c5df346d18a62e9d40fcc606bc8c34322bf2212f77770a683788db0baf4cb43595c2a27fe5ff8bdcb1fd915bcd725149d8ee8f14c71635fecb04da5dde97584f4581ceb7d907dceed80ae5daa8352dda20b25fd6001e99a96b7cf839a36cd3f5656304e6998c18e03dd2fb720cb41386c52910c9cb83272c3d50f3a6ff362ab8389b0c21c75133c971df0a75b331796371b060b32fe1673f4a041d7ae08bbdeffb45d706eaf65f99573c07972701c97766b4d7a8a03bba0f885eb3845dfd9152286e1de1f93e25ce04c54712509166dda80a84c2d34652f68e6c01e662f8b1cc7c15103a4502c29332a4fdbdda470c875809e15aab3f2fcb061ee96992ad7e8ab9da88203e35f47d6e88b07a13b0e70ef76de3be20dc06facbddc1e47206b16b44573f57396265116b4d243e77d1c98bc2b28aa3ec0f8d959764a54ecdd03d8360ff2823577fe2183e618aac15b30c1d20986841e3d83c0bfabcedb7c27ddc436eb7113db927e0beae7522b04566631a090b214660152a4f4a90e19356e66ee7309a0671b2e7bfde82667538d193fc7e397442052c6c611b6bf0a04f629a1dc7fa9eb44bfad1bfc6a0bce9f0564c3b483737e447720b7fd038c9a961a25e9594b76bf8c8071c83fcacd689c7469f698ee4aee4d4f626a73e21ce4967e705e4d83e1145b4260330367d8341c84723a1b02567ffbab26aac3afd1079887b4391f05d09780fc65f8b4f68cd51391c06593919d7eafd0775f83045b8f5c2e59cef902ff500654ea29b7623c7594ab2cc0e05ffe3f10abc46c9c5dac824673c307dcbff5bc5f3774141ff99f6a34ec4dd8a58d154a1c72636a2422b8fafdef399dec350d2b91947448582d52291f2261d264d29399ae3c92dc61769a49224af9e7c98d74190f93eb49a44db7587c1a2afb5e1a4bec5cdeb8ad2aac9728d5ae95600c52e9f063c11cdb32b7c1d8435ce76fcf1fa562bd38f14bf6c303c70fb373d951b8a691ab793f12c0f3336d6191378bccaed32923bba81868148f029e3d5712a2fb9f610997549710716db37f7400690c8dfbed12ff0a683d8e4d0079b380e2fd856eeafb8c6eedfac8fb54dacd6bd8a96e9f8d23ea87252c1a7c2b53efc6e6aa1f0cc30fbaaf68ee7d46666afc15856669cd9baebf9397ff9f322cce5285e68a985f3b6aadce5e8f14e9f9dd16764bc4e9f62168aa265d8634ab706ed40b0809023f141c36717bd6ccef9ec6aa6bfd2d00bda9375c2fee9ebba49590a166*1b0997cf64bb2c2ff88cb87bcacd9729d404bd46db18117c20d94e67c946fedc";
@@ -47,6 +46,8 @@ const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig,
 
 typedef struct electrum
 {
+  secp256k1_t coords;
+
   u32 data_buf[4096];
   u32 data_len;
 
@@ -62,102 +63,8 @@ typedef struct electrum_tmp
 
 } electrum_tmp_t;
 
-typedef struct
-{
-  u32 ukey[8];
-
-  u32 pubkey[9]; // 32 + 1 bytes (for sign of the curve point)
-
-  u32 hook_success;
-
-} electrum_hook_t;
-
-typedef struct electrum_hook_salt
-{
-  u8 ephemeral_pubkey_raw[33];
-
-  secp256k1_pubkey ephemeral_pubkey_struct;
-
-} electrum_hook_salt_t;
-
 static const char *SIGNATURE_ELECTRUM = "$electrum$4*";
 
-#define M21700_MAX_ACCEL   16
-#define M21700_MAX_THREADS 64
-
-u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : M21700_MAX_ACCEL;
-
-  return kernel_accel_max;
-}
-
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : M21700_MAX_THREADS;
-
-  return kernel_threads_max;
-}
-
-void module_hook23 (hc_device_param_t *device_param, const void *hook_salts_buf, const u32 salt_pos, const u64 pw_pos)
-{
-  electrum_hook_t *hook_items = (electrum_hook_t *) device_param->hooks_buf;
-
-  electrum_hook_salt_t *electrums = (electrum_hook_salt_t *) hook_salts_buf;
-  electrum_hook_salt_t *electrum  = &electrums[salt_pos];
-
-  // we need to copy it because the secp256k1_ec_pubkey_tweak_mul () function has side effects
-
-  secp256k1_pubkey ephemeral_pubkey = electrum->ephemeral_pubkey_struct; // shallow copy is safe !
-
-  // this hook data needs to be updated (the "hook_success" variable):
-
-  electrum_hook_t *hook_item = &hook_items[pw_pos];
-
-  hook_item->hook_success = 0;
-
-  u32 *hook_pubkey = hook_item->pubkey;
-
-  hook_pubkey[0] = hook_item->ukey[0];
-  hook_pubkey[1] = hook_item->ukey[1];
-  hook_pubkey[2] = hook_item->ukey[2];
-  hook_pubkey[3] = hook_item->ukey[3];
-  hook_pubkey[4] = hook_item->ukey[4];
-  hook_pubkey[5] = hook_item->ukey[5];
-  hook_pubkey[6] = hook_item->ukey[6];
-  hook_pubkey[7] = hook_item->ukey[7];
-  hook_pubkey[8] = 0;
-
-  /*
-   * Start with Elliptic Curve Cryptography (ECC)
-   */
-
-  const size_t length = 33; // NOT a bug (32 + 1 for the sign)
-
-  bool multiply_success = hc_secp256k1_pubkey_tweak_mul (&ephemeral_pubkey, (u8 *) hook_pubkey, length);
-
-  if (multiply_success == false) return;
-
-  // in this case hook_success set to 1 doesn't mean that we've cracked it, but just that there were
-  // no problems detected by secp256k1_ec_pubkey_tweak_mul ()
-
-  hook_item->hook_success = 1;
-}
-
-u64 module_hook_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u64 hook_size = (const u64) sizeof (electrum_hook_t);
-
-  return hook_size;
-}
-
-u64 module_hook_salt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u64 hook_salt_size = (const u64) sizeof (electrum_hook_salt_t);
-
-  return hook_salt_size;
-}
-
 u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
   const u64 esalt_size = (const u64) sizeof (electrum_t);
@@ -194,8 +101,6 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   electrum_t *esalt = (electrum_t *) esalt_buf;
 
-  electrum_hook_salt_t *hook = (electrum_hook_salt_t *) hook_salt_buf;
-
   token_t token;
 
   token.token_cnt  = 4;
@@ -245,16 +150,20 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   // ephemeral pubkey:
 
+  u32 ephemeral_pubkey[9] = { 0 };
+
+  u8 *ephemeral_pubkey_ptr = (u8 *) ephemeral_pubkey;
+
   for (u32 i = 0, j = 0; j < 66; i += 1, j += 2)
   {
-    hook->ephemeral_pubkey_raw[i] = hex_to_u8 (ephemeral_pos + j);
+    ephemeral_pubkey_ptr[i] = hex_to_u8 (ephemeral_pos + j);
   }
 
-  size_t length = 33;
+  secp256k1_t *coords = &esalt->coords;
 
-  bool parse_success = hc_secp256k1_pubkey_parse (&hook->ephemeral_pubkey_struct, hook->ephemeral_pubkey_raw, length);
+  u32 parse_success = parse_public (coords, ephemeral_pubkey);
 
-  if (parse_success == false) return (PARSER_SALT_VALUE);
+  if (parse_success != 0) return (PARSER_SALT_VALUE);
 
   // data buf:
 
@@ -296,17 +205,19 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   electrum_t *esalt = (electrum_t *) esalt_buf;
 
-  electrum_hook_salt_t *hook = (electrum_hook_salt_t *) hook_salt_buf;
-
   // ephemeral pubkey:
 
   char ephemeral[66 + 1];
 
   memset (ephemeral, 0, sizeof (ephemeral));
 
-  for (u32 i = 0, j = 0; i < 33; i += 1, j += 2)
+  u8 type = 0x02 | (esalt->coords.xy[8] & 1); // odd or even y coordinate
+
+  snprintf (ephemeral, 66 + 1, "%02x", type);
+
+  for (int i = 31, j = 2; i >= 0; i -= 1, j += 2)
   {
-    const u8 *ptr = (const u8 *) hook->ephemeral_pubkey_raw;
+    const u8 *ptr = (const u8 *) esalt->coords.xy;
 
     snprintf (ephemeral + j, 66 + 1 - j, "%02x", ptr[i]);
   }
@@ -383,16 +294,16 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
   module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
   module_ctx->module_hook12                   = MODULE_DEFAULT;
-  module_ctx->module_hook23                   = module_hook23;
-  module_ctx->module_hook_salt_size           = module_hook_salt_size;
-  module_ctx->module_hook_size                = module_hook_size;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
   module_ctx->module_jit_build_options        = module_jit_build_options;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
-  module_ctx->module_kernel_accel_max         = module_kernel_accel_max;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;
diff --git a/src/modules/module_21800.c b/src/modules/module_21800.c
index 30d3a4d1d..12ffbd834 100644
--- a/src/modules/module_21800.c
+++ b/src/modules/module_21800.c
@@ -10,11 +10,7 @@
 #include "convert.h"
 #include "shared.h"
 #include "memory.h"
-#include "emu_inc_hash_sha512.h"
-#include "emu_inc_hash_sha256.h"
-#include "emu_inc_cipher_aes.h"
-#include "ext_secp256k1.h"
-#include "zlib.h"
+#include "emu_inc_ecc_secp256k1.h"
 
 static const u32   ATTACK_EXEC    = ATTACK_EXEC_OUTSIDE_KERNEL;
 static const u32   DGST_POS0      = 0;
@@ -28,8 +24,7 @@ static const u64   KERN_TYPE      = 21800;
 static const u32   OPTI_TYPE      = OPTI_TYPE_ZERO_BYTE
                                   | OPTI_TYPE_USES_BITS_64
                                   | OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
-static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE
-                                  | OPTS_TYPE_HOOK23;
+static const u64   OPTS_TYPE      = OPTS_TYPE_PT_GENERATE_LE;
 static const u32   SALT_TYPE      = SALT_TYPE_EMBEDDED;
 static const char *ST_PASS        = "hashcat";
 static const char *ST_HASH        = "$electrum$5*02170fee7c35f1ef3b229edc90fbd0793b688a0d6f41137a97aab2343d315cce16*94cf72d8f5d774932b414a3344984859e43721268d2eb35fa531de5a2fc7024b463c730a54f4f46229dd9fede5034b19ac415c2916e9c16b02094f845795df0c397ff76d597886b1f9e014ad1a8f64a3f617d9900aa645b3ba86f16ce542251fc22c41d93fa6bc118be96d9582917e19d2a299743331804cfc7ce2c035367b4cbcfb70adfb1e10a0f2795769f2165d8fd13daa8b45eeac495b5b63e91a87f63b42e483f84a881e49adecacf6519cb564694b42dd9fe80fcbc6cdb63cf5ae33f35255266f5c2524dd93d3cc15eba0f2ccdc3c109cc2d7e8f711b8b440f168caf8b005e8bcdfe694148e94a04d2a738f09349a96600bd8e8edae793b26ebae231022f24e96cb158db141ac40400a9e9ef099e673cfe017281537c57f82fb45c62bdb64462235a6eefb594961d5eb2c46537958e4d04250804c6e9f343ab7a0db07af6b8a9d1a6c5cfcd311b8fb8383ac9ed9d98d427d526c2f517fc97473bd87cb59899bd0e8fb8c57fa0f7e0d53daa57c972cf92764af4b1725a5fb8f504b663ec519731929b3caaa793d8ee74293eee27d0e208a60e26290bc546e6fa9ed865076e13febfea249729218c1b5752e912055fbf993fbac5df2cca2b37c5e0f9c30789858ceeb3c482a8db123966775aeed2eee2fc34efb160d164929f51589bff748ca773f38978bff3508d5a7591fb2d2795df983504a788071f469d78c88fd7899cabbc5804f458653d0206b82771a59522e1fa794d7de1536c51a437f5d6df5efd6654678e5794ca429b5752e1103340ed80786f1e9da7f5b39af628b2212e4d88cd36b8a7136d50a6b6e275ab406ba7c57cc70d77d01c4c16e9363901164fa92dc9e9b99219d5376f24862e775968605001e71b000e2c7123b4b43f3ca40db17efd729388782e46e64d43ccb947db4eb1473ff1a3836b74fe312cd1a33b73b8b8d80c087088932277773c329f2f66a01d6b3fc1e651c56959ebbed7b14a21b977f3acdedf1a0d98d519a74b50c39b3052d840106da4145345d86ec0461cddafacc2a4f0dd646457ad05bf04dcbcc80516a5c5ed14d2d639a70e77b686f19cbfb63f546d81ae19cc8ba35cce3f3b5b9602df25b678e14411fecec87b8347f5047513df415c6b1a3d39871a6bcb0f67d9cf8311596deae45fd1d84a04fd58f1fd55c5156b7309af09094c99a53674809cb87a45f95a2d69f9997a38085519cb4e056f9efd56672a2c1fe927d5ea8eec25b8aff6e56f9a2310f1a481daf407b8adf16201da267c59973920fd21bb087b88123ef98709839d6a3ee34efb8ccd5c15ed0e46cff3172682769531164b66c8689c35a26299dd26d09233d1f64f9667474141cf9c6a6de7f2bc52c3bb44cfe679ff4b912c06df406283836b3581773cb76d375304f46239da5996594a8d03b14c02f1b35a432dc44a96331242ae31174*33a7ee59d6d17ed1ee99dc0a71771227e6f3734b17ba36eb589bdced56244135";
@@ -49,6 +44,14 @@ u32         module_salt_type      (MAYBE_UNUSED const hashconfig_t *hashconfig,
 const char *module_st_hash        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH;         }
 const char *module_st_pass        (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS;         }
 
+typedef struct electrum
+{
+  secp256k1_t coords;
+
+  u32 data_buf[256];
+
+} electrum_t;
+
 typedef struct electrum_tmp
 {
   u64  ipad[8];
@@ -59,250 +62,13 @@ typedef struct electrum_tmp
 
 } electrum_tmp_t;
 
-typedef struct
-{
-  u32 ukey[8];
-
-  u32 hook_success;
-
-} electrum_hook_t;
-
-typedef struct electrum_hook_salt
-{
-  u32 data_buf[256];
-
-  u8 ephemeral_pubkey_raw[33];
-
-  secp256k1_pubkey ephemeral_pubkey_struct;
-
-} electrum_hook_salt_t;
-
 static const char *SIGNATURE_ELECTRUM = "$electrum$5*";
 
-#define M21800_MAX_ACCEL   16
-#define M21800_MAX_THREADS 64
-
-u32 module_kernel_accel_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_accel_max = (user_options->kernel_accel_chgd == true) ? user_options->kernel_accel : M21800_MAX_ACCEL;
-
-  return kernel_accel_max;
-}
-
-u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u32 kernel_threads_max = (user_options->kernel_threads_chgd == true) ? user_options->kernel_threads : M21800_MAX_THREADS;
-
-  return kernel_threads_max;
-}
-
-void module_hook23 (hc_device_param_t *device_param, const void *hook_salts_buf, const u32 salt_pos, const u64 pw_pos)
-{
-  electrum_hook_t *hook_items = (electrum_hook_t *) device_param->hooks_buf;
-
-  electrum_hook_salt_t *electrums = (electrum_hook_salt_t *) hook_salts_buf;
-  electrum_hook_salt_t *electrum  = &electrums[salt_pos];
-
-  u32 *data_buf = electrum->data_buf;
-
-  // we need to copy it because the secp256k1_ec_pubkey_tweak_mul () function has side effects
-
-  secp256k1_pubkey ephemeral_pubkey = electrum->ephemeral_pubkey_struct; // shallow copy is safe !
-
-  // this hook data needs to be updated (the "hook_success" variable):
-
-  electrum_hook_t *hook_item = &hook_items[pw_pos];
-
-  hook_item->hook_success = 0;
-
-  u32 ukey[9]; // (32 + 1) + 3 = 9 * 4 = 36 bytes (+1 for holding the "sign" of the curve point)
-
-  ukey[0] = hook_item->ukey[0];
-  ukey[1] = hook_item->ukey[1];
-  ukey[2] = hook_item->ukey[2];
-  ukey[3] = hook_item->ukey[3];
-  ukey[4] = hook_item->ukey[4];
-  ukey[5] = hook_item->ukey[5];
-  ukey[6] = hook_item->ukey[6];
-  ukey[7] = hook_item->ukey[7];
-  ukey[8] = 0;
-
-  /*
-   * Start with Elliptic Curve Cryptography (ECC)
-   */
-
-  u8 *tmp_buf = (u8 *) ukey;
-
-  const size_t length = 33; // NOT a bug (32 + 1 for the sign)
-
-  bool multiply_success = hc_secp256k1_pubkey_tweak_mul (&ephemeral_pubkey, tmp_buf, length);
-
-  if (multiply_success == false) return;
-
-  u32 input[64] = { 0 };
-
-  memcpy (input, tmp_buf, length);
-
-  sha512_ctx_t sha512_ctx;
-
-  sha512_init        (&sha512_ctx);
-  sha512_update_swap (&sha512_ctx, input, length);
-  sha512_final       (&sha512_ctx);
-
-  // ... now we have the result in sha512_ctx.h[0]...sha512_ctx.h[7]
-
-  u32 iv[4];
-
-  iv[0] = v32b_from_v64 (sha512_ctx.h[0]);
-  iv[1] = v32a_from_v64 (sha512_ctx.h[0]);
-  iv[2] = v32b_from_v64 (sha512_ctx.h[1]);
-  iv[3] = v32a_from_v64 (sha512_ctx.h[1]);
-
-  iv[0] = byte_swap_32 (iv[0]);
-  iv[1] = byte_swap_32 (iv[1]);
-  iv[2] = byte_swap_32 (iv[2]);
-  iv[3] = byte_swap_32 (iv[3]);
-
-  u32 key[4];
-
-  key[0] = v32b_from_v64 (sha512_ctx.h[2]);
-  key[1] = v32a_from_v64 (sha512_ctx.h[2]);
-  key[2] = v32b_from_v64 (sha512_ctx.h[3]);
-  key[3] = v32a_from_v64 (sha512_ctx.h[3]);
-
-  key[0] = byte_swap_32 (key[0]);
-  key[1] = byte_swap_32 (key[1]);
-  key[2] = byte_swap_32 (key[2]);
-  key[3] = byte_swap_32 (key[3]);
-
-  // init AES
-
-  AES_KEY aes_key;
-
-  memset (&aes_key, 0, sizeof (aes_key));
-
-  aes128_set_decrypt_key (aes_key.rdk, key, (u32 *) te0, (u32 *) te1, (u32 *) te2, (u32 *) te3, (u32 *) td0, (u32 *) td1, (u32 *) td2, (u32 *) td3);
-
-  int aes_len = 1024; // in my tests (very few) it also worked with only 128 input bytes !
-  // int aes_len = 128;
-
-  u32 data[4];
-  u32 out[4];
-
-  u32 out_full[256]; // 1024 / 4
-
-  // we need to run it at least once:
-
-  data[0] = data_buf[0];
-  data[1] = data_buf[1];
-  data[2] = data_buf[2];
-  data[3] = data_buf[3];
-
-  aes128_decrypt (aes_key.rdk, data, out, (u32 *) td0, (u32 *) td1, (u32 *) td2, (u32 *) td3, (u32 *) td4);
-
-  out[0] ^= iv[0];
-
-  // early reject
-
-  if ((out[0] & 0x0007ffff) != 0x00059c78) return;
-
-  out[1] ^= iv[1];
-  out[2] ^= iv[2];
-  out[3] ^= iv[3];
-
-  out_full[0] = out[0];
-  out_full[1] = out[1];
-  out_full[2] = out[2];
-  out_full[3] = out[3];
-
-  iv[0] = data[0];
-  iv[1] = data[1];
-  iv[2] = data[2];
-  iv[3] = data[3];
-
-  // for aes_len > 16 we need to loop
-
-  for (int i = 16, j = 4; i < aes_len; i += 16, j += 4)
-  {
-    data[0] = data_buf[j + 0];
-    data[1] = data_buf[j + 1];
-    data[2] = data_buf[j + 2];
-    data[3] = data_buf[j + 3];
-
-    aes128_decrypt (aes_key.rdk, data, out, (u32 *) td0, (u32 *) td1, (u32 *) td2, (u32 *) td3, (u32 *) td4);
-
-    out[0] ^= iv[0];
-    out[1] ^= iv[1];
-    out[2] ^= iv[2];
-    out[3] ^= iv[3];
-
-    iv[0] = data[0];
-    iv[1] = data[1];
-    iv[2] = data[2];
-    iv[3] = data[3];
-
-    out_full[j + 0] = out[0];
-    out_full[j + 1] = out[1];
-    out_full[j + 2] = out[2];
-    out_full[j + 3] = out[3];
-  }
-
-  // decompress with zlib:
-
-  size_t  compressed_data_len   = aes_len;
-  u8     *compressed_data       = (u8 *) out_full;
-
-  size_t  decompressed_data_len = 16; // we do NOT need more than the first bytes for validation
-  u8     *decompressed_data     = (unsigned char *) hcmalloc (decompressed_data_len);
-
-  z_stream inf;
-
-  inf.zalloc = Z_NULL;
-  inf.zfree  = Z_NULL;
-  inf.opaque = Z_NULL;
-
-  inf.next_in   = compressed_data;
-  inf.avail_in  = compressed_data_len;
-
-  inf.next_out  = decompressed_data;
-  inf.avail_out = decompressed_data_len;
-
-  // inflate:
-
-  inflateInit2 (&inf, MAX_WBITS);
-
-  int zlib_ret = inflate (&inf, Z_NO_FLUSH);
-
-  inflateEnd (&inf);
-
-  if ((zlib_ret != Z_OK) && (zlib_ret != Z_STREAM_END))
-  {
-    hcfree (decompressed_data);
-
-    return;
-  }
-
-  if ((memcmp (decompressed_data, "{\n    \"",   7) == 0) ||
-      (memcmp (decompressed_data, "{\r\n    \"", 8) == 0))
-  {
-    hook_item->hook_success = 1;
-  }
-
-  hcfree (decompressed_data);
-}
-
-u64 module_hook_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
-{
-  const u64 hook_size = (const u64) sizeof (electrum_hook_t);
-
-  return hook_size;
-}
-
-u64 module_hook_salt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
+u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
 {
-  const u64 hook_salt_size = (const u64) sizeof (electrum_hook_salt_t);
+  const u64 esalt_size = (const u64) sizeof (electrum_t);
 
-  return hook_salt_size;
+  return esalt_size;
 }
 
 u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@@ -332,7 +98,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 {
   u32 *digest = (u32 *) digest_buf;
 
-  electrum_hook_salt_t *electrum = (electrum_hook_salt_t *) hook_salt_buf;
+  electrum_t *esalt = (electrum_t *) esalt_buf;
 
   token_t token;
 
@@ -377,20 +143,24 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   // ephemeral pubkey:
 
+  u32 ephemeral_pubkey[9] = { 0 };
+
+  u8 *ephemeral_pubkey_ptr = (u8 *) ephemeral_pubkey;
+
   for (u32 i = 0, j = 0; j < 66; i += 1, j += 2)
   {
-    electrum->ephemeral_pubkey_raw[i] = hex_to_u8 (ephemeral_pos + j);
+    ephemeral_pubkey_ptr[i] = hex_to_u8 (ephemeral_pos + j);
   }
 
-  size_t length = 33;
+  secp256k1_t *coords = &esalt->coords;
 
-  bool parse_success = hc_secp256k1_pubkey_parse (&electrum->ephemeral_pubkey_struct, electrum->ephemeral_pubkey_raw, length);
+  u32 parse_success = parse_public (coords, ephemeral_pubkey);
 
-  if (parse_success == false) return (PARSER_SALT_VALUE);
+  if (parse_success != 0) return (PARSER_SALT_VALUE);
 
   // data buf:
 
-  u8* data_buf_ptr = (u8 *) electrum->data_buf;
+  u8* data_buf_ptr = (u8 *) esalt->data_buf;
 
   for (u32 i = 0, j = 0; j < 2048; i += 1, j += 2)
   {
@@ -408,10 +178,10 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   // fake salt
 
-  salt->salt_buf[0] = electrum->data_buf[0];
-  salt->salt_buf[1] = electrum->data_buf[1];
-  salt->salt_buf[2] = electrum->data_buf[2];
-  salt->salt_buf[3] = electrum->data_buf[3];
+  salt->salt_buf[0] = esalt->data_buf[0];
+  salt->salt_buf[1] = esalt->data_buf[1];
+  salt->salt_buf[2] = esalt->data_buf[2];
+  salt->salt_buf[3] = esalt->data_buf[3];
 
   salt->salt_len = 16;
 
@@ -424,7 +194,7 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 {
   u32 *digest = (u32 *) digest_buf;
 
-  electrum_hook_salt_t *electrum = (electrum_hook_salt_t *) hook_salt_buf;
+  electrum_t *esalt = (electrum_t *) esalt_buf;
 
   // ephemeral pubkey:
 
@@ -432,9 +202,13 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   memset (ephemeral, 0, sizeof (ephemeral));
 
-  for (u32 i = 0, j = 0; i < 33; i += 1, j += 2)
+  u8 type = 0x02 | (esalt->coords.xy[8] & 1); // odd or even y coordinate
+
+  snprintf (ephemeral, 66 + 1, "%02x", type);
+
+  for (int i = 31, j = 2; i >= 0; i -= 1, j += 2)
   {
-    const u8 *ptr = (const u8 *) electrum->ephemeral_pubkey_raw;
+    const u8 *ptr = (const u8 *) esalt->coords.xy;
 
     snprintf (ephemeral + j, 66 + 1 - j, "%02x", ptr[i]);
   }
@@ -447,7 +221,7 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE
 
   for (u32 i = 0, j = 0; i < 1024; i += 1, j += 2)
   {
-    const u8 *ptr = (const u8 *) electrum->data_buf;
+    const u8 *ptr = (const u8 *) esalt->data_buf;
 
     snprintf (data_buf + j, 2048 + 1 - j, "%02x", ptr[i]);
   }
@@ -490,7 +264,7 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_dgst_pos3                = module_dgst_pos3;
   module_ctx->module_dgst_size                = module_dgst_size;
   module_ctx->module_dictstat_disable         = MODULE_DEFAULT;
-  module_ctx->module_esalt_size               = MODULE_DEFAULT;
+  module_ctx->module_esalt_size               = module_esalt_size;
   module_ctx->module_extra_buffer_size        = MODULE_DEFAULT;
   module_ctx->module_extra_tmp_size           = MODULE_DEFAULT;
   module_ctx->module_forced_outfile_format    = MODULE_DEFAULT;
@@ -511,16 +285,16 @@ void module_init (module_ctx_t *module_ctx)
   module_ctx->module_hashes_count_max         = MODULE_DEFAULT;
   module_ctx->module_hlfmt_disable            = MODULE_DEFAULT;
   module_ctx->module_hook12                   = MODULE_DEFAULT;
-  module_ctx->module_hook23                   = module_hook23;
-  module_ctx->module_hook_salt_size           = module_hook_salt_size;
-  module_ctx->module_hook_size                = module_hook_size;
+  module_ctx->module_hook23                   = MODULE_DEFAULT;
+  module_ctx->module_hook_salt_size           = MODULE_DEFAULT;
+  module_ctx->module_hook_size                = MODULE_DEFAULT;
   module_ctx->module_jit_build_options        = module_jit_build_options;
   module_ctx->module_jit_cache_disable        = MODULE_DEFAULT;
-  module_ctx->module_kernel_accel_max         = module_kernel_accel_max;
+  module_ctx->module_kernel_accel_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_accel_min         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_max         = MODULE_DEFAULT;
   module_ctx->module_kernel_loops_min         = MODULE_DEFAULT;
-  module_ctx->module_kernel_threads_max       = module_kernel_threads_max;
+  module_ctx->module_kernel_threads_max       = MODULE_DEFAULT;
   module_ctx->module_kernel_threads_min       = MODULE_DEFAULT;
   module_ctx->module_kern_type                = module_kern_type;
   module_ctx->module_kern_type_dynamic        = MODULE_DEFAULT;