/** * Author......: See docs/credits.txt * License.....: MIT */ #define NEW_SIMD_CODE #ifdef KERNEL_STATIC #include M2S(INCLUDE_PATH/inc_vendor.h) #include M2S(INCLUDE_PATH/inc_types.h) #include M2S(INCLUDE_PATH/inc_platform.cl) #include M2S(INCLUDE_PATH/inc_common.cl) #include M2S(INCLUDE_PATH/inc_simd.cl) #include M2S(INCLUDE_PATH/inc_hash_md5.cl) #include M2S(INCLUDE_PATH/inc_cipher_aes.cl) #endif #define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl) #define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl) typedef struct encdatavault { u32 keychain[32]; u32 iv[2]; u32 ct[2]; u32 algo; u32 version; u32 nb_keys; u32 key_len; } encdatavault_t; typedef struct encdatavault_tmp { u32 tmp_buf[4]; u32 out_buf[4]; } encdatavault_tmp_t; CONSTANT_VK u32a default_salts[32] = { 0x0fc9e7d0, 0x8be424f6, 0x569d4e72, 0xedbc2c5c, 0xdd7974f3, 0x3d8300c2, 0x9bd293d5, 0x7f9d9b8c, 0x60850c47, 0x5846e296, 0x2d995d5e, 0xf1d06a28, 0xe23f3d6b, 0x99614ba9, 0xc4edc5dd, 0xd8253ce1, 0x2ca45989, 0x1d7852db, 0x3031d09f, 0x9f348835, 0xdb1bb527, 0xe8214f79, 0xa0b2cb32, 0x42d9f20a, 0xaea8b68e, 0xd07b62a1, 0x400e17c6, 0xad6420c8, 0xeae3f44e, 0xaf4a8f84, 0xf1fab308, 0x8569bef8 }; KERNEL_FQ void m29940_init (KERN_ATTR_TMPS_ESALT (encdatavault_tmp_t, encdatavault_t)) { /** * base */ const u64 gid = get_global_id (0); if (gid >= GID_CNT) return; md5_ctx_t md5_ctx; md5_init (&md5_ctx); md5_update_global (&md5_ctx, pws[gid].i, pws[gid].pw_len); md5_final (&md5_ctx); tmps[gid].tmp_buf[0] = md5_ctx.h[0]; tmps[gid].tmp_buf[1] = md5_ctx.h[1]; tmps[gid].tmp_buf[2] = md5_ctx.h[2]; tmps[gid].tmp_buf[3] = md5_ctx.h[3]; tmps[gid].out_buf[0] = 0; tmps[gid].out_buf[1] = 0; tmps[gid].out_buf[2] = 0; tmps[gid].out_buf[3] = 0; } KERNEL_FQ void m29940_loop (KERN_ATTR_TMPS_ESALT (encdatavault_tmp_t, encdatavault_t)) { const u64 gid = get_global_id (0); if ((gid * VECT_SIZE) >= GID_CNT) return; u32x digest[4]; digest[0] = packv (tmps, tmp_buf, gid, 0); digest[1] = packv (tmps, tmp_buf, gid, 1); digest[2] = packv (tmps, tmp_buf, gid, 2); digest[3] = packv (tmps, tmp_buf, gid, 3); u32x out[4]; out[0] = packv (tmps, out_buf, gid, 0); out[1] = packv (tmps, out_buf, gid, 1); out[2] = packv (tmps, out_buf, gid, 2); out[3] = packv (tmps, out_buf, gid, 3); u32x block0[4]; u32x block1[4]; u32x block2[4]; u32x block3[4]; block0[0] = 0; block0[1] = 0; block0[2] = 0; block0[3] = 0; block1[0] = 0x80; block1[1] = 0; block1[2] = 0; block1[3] = 0; block2[0] = 0; block2[1] = 0; block2[2] = 0; block2[3] = 0; block3[0] = 0; block3[1] = 0; block3[2] = 16 * 8; block3[3] = 0; for (u32 j = 0; j < LOOP_CNT; j++) { block0[0] = digest[0]; block0[1] = digest[1]; block0[2] = digest[2]; block0[3] = digest[3]; digest[0] = MD5M_A; digest[1] = MD5M_B; digest[2] = MD5M_C; digest[3] = MD5M_D; md5_transform_vector (block0, block1, block2, block3, digest); out[0] ^= digest[0]; out[1] ^= digest[1]; out[2] ^= digest[2]; out[3] ^= digest[3]; } unpackv (tmps, tmp_buf, gid, 0, digest[0]); unpackv (tmps, tmp_buf, gid, 1, digest[1]); unpackv (tmps, tmp_buf, gid, 2, digest[2]); unpackv (tmps, tmp_buf, gid, 3, digest[3]); unpackv (tmps, out_buf, gid, 0, out[0]); unpackv (tmps, out_buf, gid, 1, out[1]); unpackv (tmps, out_buf, gid, 2, out[2]); unpackv (tmps, out_buf, gid, 3, out[3]); } KERNEL_FQ void m29940_comp (KERN_ATTR_TMPS_ESALT (encdatavault_tmp_t, encdatavault_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); const u64 lsz = get_local_size (0); /** * aes shared */ #ifdef REAL_SHM LOCAL_VK u32 s_td0[256]; LOCAL_VK u32 s_td1[256]; LOCAL_VK u32 s_td2[256]; LOCAL_VK u32 s_td3[256]; LOCAL_VK u32 s_td4[256]; LOCAL_VK u32 s_te0[256]; LOCAL_VK u32 s_te1[256]; LOCAL_VK u32 s_te2[256]; LOCAL_VK u32 s_te3[256]; LOCAL_VK u32 s_te4[256]; for (u32 i = lid; i < 256; i += lsz) { s_td0[i] = td0[i]; s_td1[i] = td1[i]; s_td2[i] = td2[i]; s_td3[i] = td3[i]; s_td4[i] = td4[i]; s_te0[i] = te0[i]; s_te1[i] = te1[i]; s_te2[i] = te2[i]; s_te3[i] = te3[i]; s_te4[i] = te4[i]; } SYNC_THREADS (); #else CONSTANT_AS u32a *s_td0 = td0; CONSTANT_AS u32a *s_td1 = td1; CONSTANT_AS u32a *s_td2 = td2; CONSTANT_AS u32a *s_td3 = td3; CONSTANT_AS u32a *s_td4 = td4; CONSTANT_AS u32a *s_te0 = te0; CONSTANT_AS u32a *s_te1 = te1; CONSTANT_AS u32a *s_te2 = te2; CONSTANT_AS u32a *s_te3 = te3; CONSTANT_AS u32a *s_te4 = te4; #endif if (gid >= GID_CNT) return; // decrypt keychain using PBKDF2 key #define ENC_MAX_KEY_NUM 8 u32 keysalt[ENC_MAX_KEY_NUM][4]; for (int i = 0, j = 0; i < ENC_MAX_KEY_NUM; i += 1, j += 4) { keysalt[i][0] = hc_swap32_S (tmps[gid].out_buf[0]) ^ default_salts[j + 0]; keysalt[i][1] = hc_swap32_S (tmps[gid].out_buf[1]) ^ default_salts[j + 1]; keysalt[i][2] = hc_swap32_S (tmps[gid].out_buf[2]) ^ default_salts[j + 2]; keysalt[i][3] = hc_swap32_S (tmps[gid].out_buf[3]) ^ default_salts[j + 3]; } u32 ukey[4]; ukey[0] = keysalt[0][0]; ukey[1] = keysalt[0][1]; ukey[2] = keysalt[0][2]; ukey[3] = keysalt[0][3]; u32 ks[44]; AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); const u32 key_len = esalt_bufs[DIGESTS_OFFSET_HOST].key_len; #define ENC_MAX_KEY_NUM 8 u32 ivs_keychain[ENC_MAX_KEY_NUM][2]; ivs_keychain[0][0] = 0; ivs_keychain[0][1] = 0; for (int i = 1, j = 7; i < ENC_MAX_KEY_NUM; i += 1, j -= 1) // +4 is not a bug, 8/16 bytes are just discarded { ivs_keychain[i][0] = keysalt[j][0]; ivs_keychain[i][1] = keysalt[j][1]; } u32 ctr_keychain[ENC_MAX_KEY_NUM][4]; #define ENC_KEYCHAIN_SIZE 128 #define ENC_BLOCK_SIZE 16 for (int i = 0, counter = 0; i < (ENC_KEYCHAIN_SIZE / ENC_BLOCK_SIZE); i++, counter++) { u32 in[4]; in[0] = ivs_keychain[0][0]; in[1] = ivs_keychain[0][1]; in[2] = 0; in[3] = counter; u32 out[4]; AES128_encrypt (ks, in, out, s_te0, s_te1, s_te2, s_te3, s_te4); ctr_keychain[i][0] = out[0]; ctr_keychain[i][1] = out[1]; ctr_keychain[i][2] = out[2]; ctr_keychain[i][3] = out[3]; for (int j = 1; j < ENC_MAX_KEY_NUM; j++) { in[0] = ivs_keychain[j][0]; in[1] = ivs_keychain[j][1]; in[2] = 0; in[3] = counter; AES128_encrypt (ks, in, out, s_te0, s_te1, s_te2, s_te3, s_te4); ctr_keychain[i][0] ^= out[0]; ctr_keychain[i][1] ^= out[1]; ctr_keychain[i][2] ^= out[2]; ctr_keychain[i][3] ^= out[3]; } } u32 keychain[ENC_MAX_KEY_NUM][4]; for (int i = 0, j = 0; i < (ENC_KEYCHAIN_SIZE / ENC_BLOCK_SIZE); i += 1, j += 4) { keychain[i][0] = ctr_keychain[i][0] ^ esalt_bufs[DIGESTS_OFFSET_HOST].keychain[j + 0]; keychain[i][1] = ctr_keychain[i][1] ^ esalt_bufs[DIGESTS_OFFSET_HOST].keychain[j + 1]; keychain[i][2] = ctr_keychain[i][2] ^ esalt_bufs[DIGESTS_OFFSET_HOST].keychain[j + 2]; keychain[i][3] = ctr_keychain[i][3] ^ esalt_bufs[DIGESTS_OFFSET_HOST].keychain[j + 3]; } // decrypt encrypted data using keychain key ukey[0] = keychain[0][0]; ukey[1] = keychain[0][1]; ukey[2] = keychain[0][2]; ukey[3] = keychain[0][3]; AES128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3); u32 ivs[ENC_MAX_KEY_NUM][2]; ivs[0][0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0]; ivs[0][1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1]; for (int i = 1; i < esalt_bufs[DIGESTS_OFFSET_HOST].nb_keys; i += 1) // +4 is not a bug, 8/16 bytes are just discarded { ivs[i][0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0] ^ keychain[i][0]; ivs[i][1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1] ^ keychain[i][1]; } #define CTR_LEN 16 u32 ctr[ENC_MAX_KEY_NUM][4]; for (int i = 0, counter = 1; i < (CTR_LEN / ENC_BLOCK_SIZE); i++, counter++) // is always just 1 iteration here, but concept is needed for later kernels { u32 in[4]; in[0] = ivs[0][0]; in[1] = ivs[0][1]; in[2] = 0; in[3] = counter; u32 out[4]; AES128_encrypt (ks, in, out, s_te0, s_te1, s_te2, s_te3, s_te4); ctr[i][0] = out[0]; ctr[i][1] = out[1]; ctr[i][2] = out[2]; ctr[i][3] = out[3]; for (int j = 1; j < esalt_bufs[DIGESTS_OFFSET_HOST].nb_keys; j++) { in[0] = ivs[j][0]; in[1] = ivs[j][1]; in[2] = 0; in[3] = counter; AES128_encrypt (ks, in, out, s_te0, s_te1, s_te2, s_te3, s_te4); ctr[i][0] ^= out[0]; ctr[i][1] ^= out[1]; ctr[i][2] ^= out[2]; ctr[i][3] ^= out[3]; } } u32 ct[2]; ct[0] = esalt_bufs[DIGESTS_OFFSET_HOST].ct[0]; ct[1] = esalt_bufs[DIGESTS_OFFSET_HOST].ct[1]; u32 pt[2]; pt[0] = ct[0] ^ ctr[0][1]; pt[1] = ct[1] ^ ctr[0][2]; if ((pt[0] == 0xd2c3b4a1) && ((pt[1] & 0x00ffffff) == 0)) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET_HOST]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, DIGESTS_OFFSET_HOST + 0, gid, 0, 0, 0); } } }