/**
 * Author......: See docs/credits.txt
 * License.....: MIT
 */

//#define NEW_SIMD_CODE

#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#include M2S(INCLUDE_PATH/inc_cipher_des.cl)
#endif

typedef struct mozilla_3des
{
  u32 ct_buf[4];

} mozilla_3des_t;

KERNEL_FQ void m26000_mxx (KERN_ATTR_RULES_ESALT (mozilla_3des_t))
{
  const u64 gid = get_global_id (0);
  const u64 lid = get_local_id (0);
  const u64 lsz = get_local_size (0);

  /**
   * aes shared
   */

  #ifdef REAL_SHM

  LOCAL_VK u32 s_SPtrans[8][64];
  LOCAL_VK u32 s_skb[8][64];

  for (u32 i = lid; i < 64; i += lsz)
  {
    s_SPtrans[0][i] = c_SPtrans[0][i];
    s_SPtrans[1][i] = c_SPtrans[1][i];
    s_SPtrans[2][i] = c_SPtrans[2][i];
    s_SPtrans[3][i] = c_SPtrans[3][i];
    s_SPtrans[4][i] = c_SPtrans[4][i];
    s_SPtrans[5][i] = c_SPtrans[5][i];
    s_SPtrans[6][i] = c_SPtrans[6][i];
    s_SPtrans[7][i] = c_SPtrans[7][i];

    s_skb[0][i] = c_skb[0][i];
    s_skb[1][i] = c_skb[1][i];
    s_skb[2][i] = c_skb[2][i];
    s_skb[3][i] = c_skb[3][i];
    s_skb[4][i] = c_skb[4][i];
    s_skb[5][i] = c_skb[5][i];
    s_skb[6][i] = c_skb[6][i];
    s_skb[7][i] = c_skb[7][i];
  }

  SYNC_THREADS ();

  #else

  CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans;
  CONSTANT_AS u32a (*s_skb)[64]     = c_skb;

  #endif

  if (gid >= GID_CNT) return;

  /**
   * base
   */

  COPY_PW (pws[gid]);

  u32 gs_buf[5];

  gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 0]);
  gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 1]);
  gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 2]);
  gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 3]);
  gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 4]);

  u32 es_buf[5];

  es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 8]);
  es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 9]);
  es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[10]);
  es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[11]);
  es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[12]);

  u32 ct_buf0[2];

  ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[0];
  ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[1];

  u32 ct_buf1[2];

  ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[2];
  ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[3];

  /**
   * loop
   */

  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
  {
    pw_t tmp = PASTE_PW;

    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);

    // my $hp = sha1 ($global_salt_bin . $word);

    sha1_ctx_t ctx0;

    sha1_init (&ctx0);

    ctx0.w0[0] = gs_buf[0];
    ctx0.w0[1] = gs_buf[1];
    ctx0.w0[2] = gs_buf[2];
    ctx0.w0[3] = gs_buf[3];
    ctx0.w1[0] = gs_buf[4];

    ctx0.len = 20;

    sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);

    sha1_final (&ctx0);

    u32 hp[5];

    hp[0] = ctx0.h[0];
    hp[1] = ctx0.h[1];
    hp[2] = ctx0.h[2];
    hp[3] = ctx0.h[3];
    hp[4] = ctx0.h[4];

    // my $chp = sha1 ($hp . $entry_salt_bin);

    sha1_init (&ctx0);

    ctx0.w0[0] = hp[0];
    ctx0.w0[1] = hp[1];
    ctx0.w0[2] = hp[2];
    ctx0.w0[3] = hp[3];
    ctx0.w1[0] = hp[4];
    ctx0.w1[1] = es_buf[0];
    ctx0.w1[2] = es_buf[1];
    ctx0.w1[3] = es_buf[2];
    ctx0.w2[0] = es_buf[3];
    ctx0.w2[1] = es_buf[4];

    ctx0.len = 40;

    sha1_final (&ctx0);

    u32 chp[5];

    chp[0] = ctx0.h[0];
    chp[1] = ctx0.h[1];
    chp[2] = ctx0.h[2];
    chp[3] = ctx0.h[3];
    chp[4] = ctx0.h[4];

    // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64);

    sha1_hmac_ctx_t ctx1;

    u32 w0[4];
    u32 w1[4];
    u32 w2[4];
    u32 w3[4];

    w0[0] = chp[0];
    w0[1] = chp[1];
    w0[2] = chp[2];
    w0[3] = chp[3];
    w1[0] = chp[4];
    w1[1] = 0;
    w1[2] = 0;
    w1[3] = 0;
    w2[0] = 0;
    w2[1] = 0;
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3);

    sha1_hmac_ctx_t ctx1a = ctx1;

    w0[0] = es_buf[0];
    w0[1] = es_buf[1];
    w0[2] = es_buf[2];
    w0[3] = es_buf[3];
    w1[0] = es_buf[4];
    w1[1] = es_buf[0];
    w1[2] = es_buf[1];
    w1[3] = es_buf[2];
    w2[0] = es_buf[3];
    w2[1] = es_buf[4];
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40);

    sha1_hmac_final (&ctx1a);

    u32 k1[5];

    k1[0] = ctx1a.opad.h[0];
    k1[1] = ctx1a.opad.h[1];
    k1[2] = ctx1a.opad.h[2];
    k1[3] = ctx1a.opad.h[3];
    k1[4] = ctx1a.opad.h[4];

    // my $tk = hmac ($pes, $chp, \&sha1, 64);

    sha1_hmac_ctx_t ctx1b = ctx1;

    w0[0] = es_buf[0];
    w0[1] = es_buf[1];
    w0[2] = es_buf[2];
    w0[3] = es_buf[3];
    w1[0] = es_buf[4];
    w1[1] = 0;
    w1[2] = 0;
    w1[3] = 0;
    w2[0] = 0;
    w2[1] = 0;
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20);

    sha1_hmac_final (&ctx1b);

    u32 tk[5];

    tk[0] = ctx1b.opad.h[0];
    tk[1] = ctx1b.opad.h[1];
    tk[2] = ctx1b.opad.h[2];
    tk[3] = ctx1b.opad.h[3];
    tk[4] = ctx1b.opad.h[4];

    // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64);

    sha1_hmac_ctx_t ctx1c = ctx1;

    w0[0] = tk[0];
    w0[1] = tk[1];
    w0[2] = tk[2];
    w0[3] = tk[3];
    w1[0] = tk[4];
    w1[1] = es_buf[0];
    w1[2] = es_buf[1];
    w1[3] = es_buf[2];
    w2[0] = es_buf[3];
    w2[1] = es_buf[4];
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40);

    sha1_hmac_final (&ctx1c);

    u32 k2[5];

    k2[0] = ctx1c.opad.h[0];
    k2[1] = ctx1c.opad.h[1];
    k2[2] = ctx1c.opad.h[2];
    k2[3] = ctx1c.opad.h[3];
    k2[4] = ctx1c.opad.h[4];

    // 3DES

    u32 ukey[6];

    ukey[0] = hc_swap32_S (k1[0]);
    ukey[1] = hc_swap32_S (k1[1]);
    ukey[2] = hc_swap32_S (k1[2]);
    ukey[3] = hc_swap32_S (k1[3]);
    ukey[4] = hc_swap32_S (k1[4]);
    ukey[5] = hc_swap32_S (k2[0]);

    u32 iv[2];

    iv[0] = hc_swap32_S (k2[3]);
    iv[1] = hc_swap32_S (k2[4]);

    u32 K0[16];
    u32 K1[16];
    u32 K2[16];
    u32 K3[16];
    u32 K4[16];
    u32 K5[16];

    _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb);
    _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb);
    _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb);

    u32 ct[2];
    u32 pt[2];

    u32 t1[2];
    u32 t2[2];

    ct[0] = ct_buf0[0];
    ct[1] = ct_buf0[1];

    _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans);
    _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans);
    _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans);

    pt[0] ^= iv[0];
    pt[1] ^= iv[1];

    // password

    if (pt[0] != 0x73736170) continue;
    if (pt[1] != 0x64726f77) continue;

    iv[0] = ct_buf0[0];
    iv[1] = ct_buf0[1];

    ct[0] = ct_buf1[0];
    ct[1] = ct_buf1[1];

    _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans);
    _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans);
    _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans);

    pt[0] ^= iv[0];
    pt[1] ^= iv[1];

    // -check\x02\x02

    if (pt[0] != 0x6568632d) continue;
    if (pt[1] != 0x02026b63) continue;

    const u32 r0 = ct_buf0[0];
    const u32 r1 = ct_buf0[1];
    const u32 r2 = ct_buf1[0];
    const u32 r3 = ct_buf1[1];

    COMPARE_M_SCALAR (r0, r1, r2, r3);
  }
}

KERNEL_FQ void m26000_sxx (KERN_ATTR_RULES_ESALT (mozilla_3des_t))
{
  const u64 gid = get_global_id (0);
  const u64 lid = get_local_id (0);
  const u64 lsz = get_local_size (0);

  /**
   * aes shared
   */

  #ifdef REAL_SHM

  LOCAL_VK u32 s_SPtrans[8][64];
  LOCAL_VK u32 s_skb[8][64];

  for (u32 i = lid; i < 64; i += lsz)
  {
    s_SPtrans[0][i] = c_SPtrans[0][i];
    s_SPtrans[1][i] = c_SPtrans[1][i];
    s_SPtrans[2][i] = c_SPtrans[2][i];
    s_SPtrans[3][i] = c_SPtrans[3][i];
    s_SPtrans[4][i] = c_SPtrans[4][i];
    s_SPtrans[5][i] = c_SPtrans[5][i];
    s_SPtrans[6][i] = c_SPtrans[6][i];
    s_SPtrans[7][i] = c_SPtrans[7][i];

    s_skb[0][i] = c_skb[0][i];
    s_skb[1][i] = c_skb[1][i];
    s_skb[2][i] = c_skb[2][i];
    s_skb[3][i] = c_skb[3][i];
    s_skb[4][i] = c_skb[4][i];
    s_skb[5][i] = c_skb[5][i];
    s_skb[6][i] = c_skb[6][i];
    s_skb[7][i] = c_skb[7][i];
  }

  SYNC_THREADS ();

  #else

  CONSTANT_AS u32a (*s_SPtrans)[64] = c_SPtrans;
  CONSTANT_AS u32a (*s_skb)[64]     = c_skb;

  #endif

  if (gid >= GID_CNT) return;

  /**
   * digest
   */

  const u32 search[4] =
  {
    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
    digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
  };

  /**
   * base
   */

  COPY_PW (pws[gid]);

  u32 gs_buf[5];

  gs_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 0]);
  gs_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 1]);
  gs_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 2]);
  gs_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 3]);
  gs_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 4]);

  u32 es_buf[5];

  es_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 8]);
  es_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[ 9]);
  es_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[10]);
  es_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[11]);
  es_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[12]);

  u32 ct_buf0[2];

  ct_buf0[0] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[0];
  ct_buf0[1] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[1];

  u32 ct_buf1[2];

  ct_buf1[0] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[2];
  ct_buf1[1] = esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf[3];

  /**
   * loop
   */

  for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
  {
    pw_t tmp = PASTE_PW;

    tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);

    // my $hp = sha1 ($global_salt_bin . $word);

    sha1_ctx_t ctx0;

    sha1_init (&ctx0);

    ctx0.w0[0] = gs_buf[0];
    ctx0.w0[1] = gs_buf[1];
    ctx0.w0[2] = gs_buf[2];
    ctx0.w0[3] = gs_buf[3];
    ctx0.w1[0] = gs_buf[4];

    ctx0.len = 20;

    sha1_update_swap (&ctx0, tmp.i, tmp.pw_len);

    sha1_final (&ctx0);

    u32 hp[5];

    hp[0] = ctx0.h[0];
    hp[1] = ctx0.h[1];
    hp[2] = ctx0.h[2];
    hp[3] = ctx0.h[3];
    hp[4] = ctx0.h[4];

    // my $chp = sha1 ($hp . $entry_salt_bin);

    sha1_init (&ctx0);

    ctx0.w0[0] = hp[0];
    ctx0.w0[1] = hp[1];
    ctx0.w0[2] = hp[2];
    ctx0.w0[3] = hp[3];
    ctx0.w1[0] = hp[4];
    ctx0.w1[1] = es_buf[0];
    ctx0.w1[2] = es_buf[1];
    ctx0.w1[3] = es_buf[2];
    ctx0.w2[0] = es_buf[3];
    ctx0.w2[1] = es_buf[4];

    ctx0.len = 40;

    sha1_final (&ctx0);

    u32 chp[5];

    chp[0] = ctx0.h[0];
    chp[1] = ctx0.h[1];
    chp[2] = ctx0.h[2];
    chp[3] = ctx0.h[3];
    chp[4] = ctx0.h[4];

    // my $k1 = hmac ($pes . $entry_salt_bin, $chp, \&sha1, 64);

    sha1_hmac_ctx_t ctx1;

    u32 w0[4];
    u32 w1[4];
    u32 w2[4];
    u32 w3[4];

    w0[0] = chp[0];
    w0[1] = chp[1];
    w0[2] = chp[2];
    w0[3] = chp[3];
    w1[0] = chp[4];
    w1[1] = 0;
    w1[2] = 0;
    w1[3] = 0;
    w2[0] = 0;
    w2[1] = 0;
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_init_64 (&ctx1, w0, w1, w2, w3);

    sha1_hmac_ctx_t ctx1a = ctx1;

    w0[0] = es_buf[0];
    w0[1] = es_buf[1];
    w0[2] = es_buf[2];
    w0[3] = es_buf[3];
    w1[0] = es_buf[4];
    w1[1] = es_buf[0];
    w1[2] = es_buf[1];
    w1[3] = es_buf[2];
    w2[0] = es_buf[3];
    w2[1] = es_buf[4];
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_update_64 (&ctx1a, w0, w1, w2, w3, 40);

    sha1_hmac_final (&ctx1a);

    u32 k1[5];

    k1[0] = ctx1a.opad.h[0];
    k1[1] = ctx1a.opad.h[1];
    k1[2] = ctx1a.opad.h[2];
    k1[3] = ctx1a.opad.h[3];
    k1[4] = ctx1a.opad.h[4];

    // my $tk = hmac ($pes, $chp, \&sha1, 64);

    sha1_hmac_ctx_t ctx1b = ctx1;

    w0[0] = es_buf[0];
    w0[1] = es_buf[1];
    w0[2] = es_buf[2];
    w0[3] = es_buf[3];
    w1[0] = es_buf[4];
    w1[1] = 0;
    w1[2] = 0;
    w1[3] = 0;
    w2[0] = 0;
    w2[1] = 0;
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_update_64 (&ctx1b, w0, w1, w2, w3, 20);

    sha1_hmac_final (&ctx1b);

    u32 tk[5];

    tk[0] = ctx1b.opad.h[0];
    tk[1] = ctx1b.opad.h[1];
    tk[2] = ctx1b.opad.h[2];
    tk[3] = ctx1b.opad.h[3];
    tk[4] = ctx1b.opad.h[4];

    // my $k2 = hmac ($tk . $entry_salt_bin, $chp, \&sha1, 64);

    sha1_hmac_ctx_t ctx1c = ctx1;

    w0[0] = tk[0];
    w0[1] = tk[1];
    w0[2] = tk[2];
    w0[3] = tk[3];
    w1[0] = tk[4];
    w1[1] = es_buf[0];
    w1[2] = es_buf[1];
    w1[3] = es_buf[2];
    w2[0] = es_buf[3];
    w2[1] = es_buf[4];
    w2[2] = 0;
    w2[3] = 0;
    w3[0] = 0;
    w3[1] = 0;
    w3[2] = 0;
    w3[3] = 0;

    sha1_hmac_update_64 (&ctx1c, w0, w1, w2, w3, 40);

    sha1_hmac_final (&ctx1c);

    u32 k2[5];

    k2[0] = ctx1c.opad.h[0];
    k2[1] = ctx1c.opad.h[1];
    k2[2] = ctx1c.opad.h[2];
    k2[3] = ctx1c.opad.h[3];
    k2[4] = ctx1c.opad.h[4];

    // 3DES

    u32 ukey[6];

    ukey[0] = hc_swap32_S (k1[0]);
    ukey[1] = hc_swap32_S (k1[1]);
    ukey[2] = hc_swap32_S (k1[2]);
    ukey[3] = hc_swap32_S (k1[3]);
    ukey[4] = hc_swap32_S (k1[4]);
    ukey[5] = hc_swap32_S (k2[0]);

    u32 iv[2];

    iv[0] = hc_swap32_S (k2[3]);
    iv[1] = hc_swap32_S (k2[4]);

    u32 K0[16];
    u32 K1[16];
    u32 K2[16];
    u32 K3[16];
    u32 K4[16];
    u32 K5[16];

    _des_crypt_keysetup (ukey[0], ukey[1], K0, K1, s_skb);
    _des_crypt_keysetup (ukey[2], ukey[3], K2, K3, s_skb);
    _des_crypt_keysetup (ukey[4], ukey[5], K4, K5, s_skb);

    u32 ct[2];
    u32 pt[2];

    u32 t1[2];
    u32 t2[2];

    ct[0] = ct_buf0[0];
    ct[1] = ct_buf0[1];

    _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans);
    _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans);
    _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans);

    pt[0] ^= iv[0];
    pt[1] ^= iv[1];

    // password

    if (pt[0] != 0x73736170) continue;
    if (pt[1] != 0x64726f77) continue;

    iv[0] = ct_buf0[0];
    iv[1] = ct_buf0[1];

    ct[0] = ct_buf1[0];
    ct[1] = ct_buf1[1];

    _des_crypt_decrypt (t1, ct, K4, K5, s_SPtrans);
    _des_crypt_encrypt (t2, t1, K2, K3, s_SPtrans);
    _des_crypt_decrypt (pt, t2, K0, K1, s_SPtrans);

    pt[0] ^= iv[0];
    pt[1] ^= iv[1];

    // -check\x02\x02

    if (pt[0] != 0x6568632d) continue;
    if (pt[1] != 0x02026b63) continue;

    const u32 r0 = ct_buf0[0];
    const u32 r1 = ct_buf0[1];
    const u32 r2 = ct_buf1[0];
    const u32 r3 = ct_buf1[1];

    COMPARE_S_SCALAR (r0, r1, r2, r3);
  }
}