/** * Author......: See docs/credits.txt * License.....: MIT */ //#define NEW_SIMD_CODE #ifdef KERNEL_STATIC #include M2S(INCLUDE_PATH/inc_vendor.h) #include M2S(INCLUDE_PATH/inc_types.h) #include M2S(INCLUDE_PATH/inc_platform.cl) #include M2S(INCLUDE_PATH/inc_common.cl) #include M2S(INCLUDE_PATH/inc_hash_sha1.cl) #include M2S(INCLUDE_PATH/inc_cipher_aes.cl) #include M2S(INCLUDE_PATH/inc_hash_sha512.cl) #endif typedef struct gpg { u32 cipher_algo; u32 iv[4]; u32 modulus_size; u32 encrypted_data[384]; u32 encrypted_data_size; } gpg_t; typedef struct gpg_tmp { // buffer for a maximum of 256 + 8 octets, we extend it to 384 octets so it's always 128 byte aligned (1024 bits) u32 salted_pw_block[96]; // actual number of bytes in 'salted_pwd' that are used since salt and password are copied multiple times into the buffer u32 salted_pw_block_len; u64 h[8]; u32 w0[4]; u32 w1[4]; u32 w2[4]; u32 w3[4]; u32 w4[4]; u32 w5[4]; u32 w6[4]; u32 w7[4]; int len; } gpg_tmp_t; DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); #endif const u32 r = hc_byte_perm (b, a, selector); #endif return r; } DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; const u32 count = ((offset + len + 3) >> 2) - start_index; const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); } } DECLSPEC void memzero_le_S (PRIVATE_AS u32 *block, const u32 start_offset, const u32 end_offset) { const u32 start_idx = start_offset / 4; // zero out bytes in the first u32 starting from 'start_offset' // math is a bit complex to avoid shifting by 32 bits, which is not possible on some architectures block[start_idx] &= ~(0xffffffff << ((start_offset & 3) * 8)); const u32 end_idx = (end_offset + 3) / 4; // zero out bytes in u32 units -- note that the last u32 is completely zeroed! for (u32 i = start_idx + 1; i < end_idx; i++) { block[i] = 0; } } DECLSPEC void memzero_be_S (PRIVATE_AS u32 *block, const u32 start_offset, const u32 end_offset) { const u32 start_idx = start_offset / 4; // zero out bytes in the first u32 starting from 'start_offset' // math is a bit complex to avoid shifting by 32 bits, which is not possible on some architectures block[start_idx] &= ~(0xffffffff >> ((start_offset & 3) * 8)); const u32 end_idx = (end_offset + 3) / 4; // zero out bytes in u32 units -- note that the last u32 is completely zeroed! for (u32 i = start_idx + 1; i < end_idx; i++) { block[i] = 0; } } DECLSPEC void aes128_decrypt_cfb (GLOBAL_AS const u32 *encrypted_data, int data_len, PRIVATE_AS const u32 *iv, PRIVATE_AS const u32 *key, PRIVATE_AS u32 *decrypted_data, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { u32 ks[44]; u32 essiv[4]; // Copy the IV, since this will be modified essiv[0] = iv[0]; essiv[1] = iv[1]; essiv[2] = iv[2]; essiv[3] = iv[3]; aes128_set_encrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3); // Decrypt an AES-128 encrypted block for (u32 i = 0; i < (data_len + 3) / 4; i += 4) { aes128_encrypt (ks, essiv, decrypted_data + i, s_te0, s_te1, s_te2, s_te3, s_te4); decrypted_data[i + 0] ^= encrypted_data[i + 0]; decrypted_data[i + 1] ^= encrypted_data[i + 1]; decrypted_data[i + 2] ^= encrypted_data[i + 2]; decrypted_data[i + 3] ^= encrypted_data[i + 3]; // Note: Not necessary if you are only decrypting a single block! essiv[0] = encrypted_data[i + 0]; essiv[1] = encrypted_data[i + 1]; essiv[2] = encrypted_data[i + 2]; essiv[3] = encrypted_data[i + 3]; } } DECLSPEC void aes256_decrypt_cfb (GLOBAL_AS const u32 *encrypted_data, int data_len, PRIVATE_AS const u32 *iv, PRIVATE_AS const u32 *key, PRIVATE_AS u32 *decrypted_data, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { u32 ks[60]; u32 essiv[4]; // Copy the IV, since this will be modified essiv[0] = iv[0]; essiv[1] = iv[1]; essiv[2] = iv[2]; essiv[3] = iv[3]; aes256_set_encrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3); // Decrypt an AES-256 encrypted block for (u32 i = 0; i < (data_len + 3) / 4; i += 4) { aes256_encrypt (ks, essiv, decrypted_data + i, s_te0, s_te1, s_te2, s_te3, s_te4); decrypted_data[i + 0] ^= encrypted_data[i + 0]; decrypted_data[i + 1] ^= encrypted_data[i + 1]; decrypted_data[i + 2] ^= encrypted_data[i + 2]; decrypted_data[i + 3] ^= encrypted_data[i + 3]; // Note: Not necessary if you are only decrypting a single block! essiv[0] = encrypted_data[i + 0]; essiv[1] = encrypted_data[i + 1]; essiv[2] = encrypted_data[i + 2]; essiv[3] = encrypted_data[i + 3]; } } DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded_data_size) { // Check the SHA-1 of the decrypted data which is stored at the end of the decrypted data const u32 sha1_byte_off = (decoded_data_size - 20); const u32 sha1_u32_off = sha1_byte_off / 4; u32 expected_sha1[5]; expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); sha1_ctx_t ctx; sha1_init (&ctx); sha1_update_swap (&ctx, decoded_data, sha1_byte_off); sha1_final (&ctx); return (expected_sha1[0] == hc_swap32_S (ctx.h[0])) && (expected_sha1[1] == hc_swap32_S (ctx.h[1])) && (expected_sha1[2] == hc_swap32_S (ctx.h[2])) && (expected_sha1[3] == hc_swap32_S (ctx.h[3])) && (expected_sha1[4] == hc_swap32_S (ctx.h[4])); } KERNEL_FQ void m17020_init (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t)) { const u64 gid = get_global_id (0); if (gid >= GID_CNT) return; const u32 pw_len = pws[gid].pw_len; const u32 salted_pw_len = (salt_bufs[SALT_POS_HOST].salt_len + pw_len); u32 salted_pw_block[96]; // concatenate salt and password -- the salt is always 8 bytes (2 * u32) salted_pw_block[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; salted_pw_block[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; for (u32 idx = 0; idx < 64; idx++) salted_pw_block[idx + 2] = pws[gid].i[idx]; // zero remainder of buffer, the buffer will now be 96 words (3072 bits) containing: // 0 - 1: salt // 2 - 65: zero-padded password (max pwd len: 64 words = 256 bytes) // 66 - 95: zeros for (u32 idx = 66; idx < 96; idx++) salted_pw_block[idx] = 0; // create a number of copies for efficiency const u32 copies = 96 * sizeof(u32) / salted_pw_len; for (u32 idx = 1; idx < copies; idx++) { memcat_le_S (salted_pw_block, idx * salted_pw_len, salted_pw_block, salted_pw_len); } for (u32 idx = 0; idx < 96; idx++) { tmps[gid].salted_pw_block[idx] = hc_swap32_S (salted_pw_block[idx]); } tmps[gid].salted_pw_block_len = (copies * salted_pw_len); tmps[gid].h[ 0] = SHA512M_A; tmps[gid].h[ 1] = SHA512M_B; tmps[gid].h[ 2] = SHA512M_C; tmps[gid].h[ 3] = SHA512M_D; tmps[gid].h[ 4] = SHA512M_E; tmps[gid].h[ 5] = SHA512M_F; tmps[gid].h[ 6] = SHA512M_G; tmps[gid].h[ 7] = SHA512M_H; tmps[gid].h[ 8] = SHA512M_A; tmps[gid].h[ 9] = SHA512M_B; tmps[gid].h[10] = SHA512M_C; tmps[gid].h[11] = SHA512M_D; tmps[gid].h[12] = SHA512M_E; tmps[gid].h[13] = SHA512M_F; tmps[gid].h[14] = SHA512M_G; tmps[gid].h[15] = SHA512M_H; tmps[gid].len = 0; } KERNEL_FQ void m17020_loop_prepare (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t)) { const u64 gid = get_global_id (0); if (gid >= GID_CNT) return; tmps[gid].w0[0] = 0; tmps[gid].w0[1] = 0; tmps[gid].w0[2] = 0; tmps[gid].w0[3] = 0; tmps[gid].w1[0] = 0; tmps[gid].w1[1] = 0; tmps[gid].w1[2] = 0; tmps[gid].w1[3] = 0; tmps[gid].w2[0] = 0; tmps[gid].w2[1] = 0; tmps[gid].w2[2] = 0; tmps[gid].w2[3] = 0; tmps[gid].w3[0] = 0; tmps[gid].w3[1] = 0; tmps[gid].w3[2] = 0; tmps[gid].w3[3] = 0; tmps[gid].w4[0] = 0; tmps[gid].w4[1] = 0; tmps[gid].w4[2] = 0; tmps[gid].w4[3] = 0; tmps[gid].w5[0] = 0; tmps[gid].w5[1] = 0; tmps[gid].w5[2] = 0; tmps[gid].w5[3] = 0; tmps[gid].w6[0] = 0; tmps[gid].w6[1] = 0; tmps[gid].w6[2] = 0; tmps[gid].w6[3] = 0; tmps[gid].w7[0] = 0; tmps[gid].w7[1] = 0; tmps[gid].w7[2] = 0; tmps[gid].w7[3] = 0; tmps[gid].len = 0; } KERNEL_FQ void m17020_loop (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t)) { const u64 gid = get_global_id (0); if (gid >= GID_CNT) return; // get the prepared buffer from the gpg_tmp_t struct into a local buffer u32 salted_pw_block[96]; for (int i = 0; i < 96; i++) salted_pw_block[i] = tmps[gid].salted_pw_block[i]; const u32 salted_pw_block_len = tmps[gid].salted_pw_block_len; // do we really need this, since the salt is always length 8? if (salted_pw_block_len == 0) return; /** * context load */ sha512_ctx_t ctx; for (int i = 0; i < 8; i++) ctx.h[i] = tmps[gid].h[i]; for (int i = 0; i < 4; i++) ctx.w0[i] = tmps[gid].w0[i]; for (int i = 0; i < 4; i++) ctx.w1[i] = tmps[gid].w1[i]; for (int i = 0; i < 4; i++) ctx.w2[i] = tmps[gid].w2[i]; for (int i = 0; i < 4; i++) ctx.w3[i] = tmps[gid].w3[i]; for (int i = 0; i < 4; i++) ctx.w4[i] = tmps[gid].w4[i]; for (int i = 0; i < 4; i++) ctx.w5[i] = tmps[gid].w5[i]; for (int i = 0; i < 4; i++) ctx.w6[i] = tmps[gid].w6[i]; for (int i = 0; i < 4; i++) ctx.w7[i] = tmps[gid].w7[i]; ctx.len = tmps[gid].len; // sha-512 of salt and password, up to 'salt_iter' bytes const u32 salt_iter = salt_bufs[SALT_POS_HOST].salt_iter; const u32 salted_pw_block_pos = LOOP_POS % salted_pw_block_len; const u32 rounds = (LOOP_CNT + salted_pw_block_pos) / salted_pw_block_len; for (u32 i = 0; i < rounds; i++) { sha512_update (&ctx, salted_pw_block, salted_pw_block_len); } if ((LOOP_POS + LOOP_CNT) == salt_iter) { const u32 remaining_bytes = salt_iter % salted_pw_block_len; if (remaining_bytes) { memzero_be_S (salted_pw_block, remaining_bytes, salted_pw_block_len); sha512_update (&ctx, salted_pw_block, remaining_bytes); } sha512_final (&ctx); } /** * context save */ for (int i = 0; i < 8; i++) tmps[gid].h[i] = ctx.h[i]; for (int i = 0; i < 4; i++) tmps[gid].w0[i] = ctx.w0[i]; for (int i = 0; i < 4; i++) tmps[gid].w1[i] = ctx.w1[i]; for (int i = 0; i < 4; i++) tmps[gid].w2[i] = ctx.w2[i]; for (int i = 0; i < 4; i++) tmps[gid].w3[i] = ctx.w3[i]; for (int i = 0; i < 4; i++) tmps[gid].w4[i] = ctx.w4[i]; for (int i = 0; i < 4; i++) tmps[gid].w5[i] = ctx.w5[i]; for (int i = 0; i < 4; i++) tmps[gid].w6[i] = ctx.w6[i]; for (int i = 0; i < 4; i++) tmps[gid].w7[i] = ctx.w7[i]; tmps[gid].len = ctx.len; } KERNEL_FQ void m17020_comp (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t)) { // not in use here, special case... } KERNEL_FQ void m17020_aux1 (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t)) { /** * modifier */ const u64 lid = get_local_id (0); const u64 gid = get_global_id (0); const u64 lsz = get_local_size (0); /** * aes shared */ #ifdef REAL_SHM LOCAL_VK u32 s_te0[256]; LOCAL_VK u32 s_te1[256]; LOCAL_VK u32 s_te2[256]; LOCAL_VK u32 s_te3[256]; LOCAL_VK u32 s_te4[256]; for (u32 i = lid; i < 256; i += lsz) { s_te0[i] = te0[i]; s_te1[i] = te1[i]; s_te2[i] = te2[i]; s_te3[i] = te3[i]; s_te4[i] = te4[i]; } SYNC_THREADS (); #else CONSTANT_AS u32a *s_te0 = te0; CONSTANT_AS u32a *s_te1 = te1; CONSTANT_AS u32a *s_te2 = te2; CONSTANT_AS u32a *s_te3 = te3; CONSTANT_AS u32a *s_te4 = te4; #endif if (gid >= GID_CNT) return; // retrieve and use the SHA-512 as the key for AES u32 aes_key[4]; aes_key[0] = hc_swap32_S (h32_from_64 (tmps[gid].h[0])); aes_key[1] = hc_swap32_S (l32_from_64 (tmps[gid].h[0])); aes_key[2] = hc_swap32_S (h32_from_64 (tmps[gid].h[1])); aes_key[3] = hc_swap32_S (l32_from_64 (tmps[gid].h[1])); u32 iv[4] = {0}; for (int idx = 0; idx < 4; idx++) iv[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[idx]; u32 decoded_data[384]; const u32 enc_data_size = esalt_bufs[DIGESTS_OFFSET_HOST].encrypted_data_size; aes128_decrypt_cfb (esalt_bufs[DIGESTS_OFFSET_HOST].encrypted_data, enc_data_size, iv, aes_key, decoded_data, s_te0, s_te1, s_te2, s_te3, s_te4); if (check_decoded_data (decoded_data, enc_data_size)) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET_HOST]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, DIGESTS_OFFSET_HOST + 0, gid, 0, 0, 0); } } } KERNEL_FQ void m17020_aux2 (KERN_ATTR_TMPS_ESALT (gpg_tmp_t, gpg_t)) { /** * modifier */ const u64 lid = get_local_id (0); const u64 gid = get_global_id (0); const u64 lsz = get_local_size (0); /** * aes shared */ #ifdef REAL_SHM LOCAL_VK u32 s_te0[256]; LOCAL_VK u32 s_te1[256]; LOCAL_VK u32 s_te2[256]; LOCAL_VK u32 s_te3[256]; LOCAL_VK u32 s_te4[256]; for (u32 i = lid; i < 256; i += lsz) { s_te0[i] = te0[i]; s_te1[i] = te1[i]; s_te2[i] = te2[i]; s_te3[i] = te3[i]; s_te4[i] = te4[i]; } SYNC_THREADS (); #else CONSTANT_AS u32a *s_te0 = te0; CONSTANT_AS u32a *s_te1 = te1; CONSTANT_AS u32a *s_te2 = te2; CONSTANT_AS u32a *s_te3 = te3; CONSTANT_AS u32a *s_te4 = te4; #endif if (gid >= GID_CNT) return; // retrieve and use the SHA-512 as the key for AES u32 aes_key[8]; aes_key[0] = hc_swap32_S (h32_from_64 (tmps[gid].h[0])); aes_key[1] = hc_swap32_S (l32_from_64 (tmps[gid].h[0])); aes_key[2] = hc_swap32_S (h32_from_64 (tmps[gid].h[1])); aes_key[3] = hc_swap32_S (l32_from_64 (tmps[gid].h[1])); aes_key[4] = hc_swap32_S (h32_from_64 (tmps[gid].h[2])); aes_key[5] = hc_swap32_S (l32_from_64 (tmps[gid].h[2])); aes_key[6] = hc_swap32_S (h32_from_64 (tmps[gid].h[3])); aes_key[7] = hc_swap32_S (l32_from_64 (tmps[gid].h[3])); u32 iv[4] = {0}; for (int idx = 0; idx < 4; idx++) iv[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[idx]; u32 decoded_data[384]; const u32 enc_data_size = esalt_bufs[DIGESTS_OFFSET_HOST].encrypted_data_size; aes256_decrypt_cfb (esalt_bufs[DIGESTS_OFFSET_HOST].encrypted_data, enc_data_size, iv, aes_key, decoded_data, s_te0, s_te1, s_te2, s_te3, s_te4); if (check_decoded_data (decoded_data, enc_data_size)) { if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET_HOST]) == 0) { mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, DIGESTS_OFFSET_HOST + 0, gid, 0, 0, 0); } } }