From caa34924bf8150cfd0b0f1ec5a5913c72de86fff Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Sat, 22 Feb 2020 10:18:09 +0100 Subject: [PATCH] More optimizations in -m 15300 and -m 15900 --- OpenCL/m15300-pure.cl | 192 +++++++++++++++++++++++++++++------------- OpenCL/m15900-pure.cl | 169 ++++++++++++++++++++++++++----------- 2 files changed, 253 insertions(+), 108 deletions(-) diff --git a/OpenCL/m15300-pure.cl b/OpenCL/m15300-pure.cl index 57e920a11..f49c5c410 100644 --- a/OpenCL/m15300-pure.cl +++ b/OpenCL/m15300-pure.cl @@ -442,8 +442,6 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) iv[0] = hc_swap32_S (tmps[gid].out[6]); iv[1] = hc_swap32_S (tmps[gid].out[7]); - u32 decrypted[26]; - /* Construct 3DES keys */ const u32 a = (key[0]); @@ -470,45 +468,123 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) _des_crypt_keysetup (e, f, Ke, Kf, s_skb); - u32 contents_pos; - u32 contents_off; - u32 wx_off; + u32 p1[2]; + u32 p2[2]; + u32 out[2]; - for (wx_off = 0, contents_pos = 0, contents_off = 0; contents_pos < esalt_bufs[digests_offset].contents_len; wx_off += 2, contents_pos += 8, contents_off += 2) + u32 hmac_data[4]; + + hmac_data[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[0]); + hmac_data[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[1]); + hmac_data[2] = hc_swap32_S (esalt_bufs[digests_offset].contents[2]); + hmac_data[3] = hc_swap32_S (esalt_bufs[digests_offset].contents[3]); + + u32 expected_key[4]; + + expected_key[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[4]); + expected_key[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[5]); + expected_key[2] = hc_swap32_S (esalt_bufs[digests_offset].contents[6]); + expected_key[3] = hc_swap32_S (esalt_bufs[digests_offset].contents[7]); + + u32 last_iv[2]; + + last_iv[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[8]); + last_iv[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[9]); + + u32 last_key[16]; + + last_key[ 0] = hc_swap32_S (esalt_bufs[digests_offset].contents[10]); + last_key[ 1] = hc_swap32_S (esalt_bufs[digests_offset].contents[11]); + last_key[ 2] = hc_swap32_S (esalt_bufs[digests_offset].contents[12]); + last_key[ 3] = hc_swap32_S (esalt_bufs[digests_offset].contents[13]); + last_key[ 4] = hc_swap32_S (esalt_bufs[digests_offset].contents[14]); + last_key[ 5] = hc_swap32_S (esalt_bufs[digests_offset].contents[15]); + last_key[ 6] = hc_swap32_S (esalt_bufs[digests_offset].contents[16]); + last_key[ 7] = hc_swap32_S (esalt_bufs[digests_offset].contents[17]); + last_key[ 8] = hc_swap32_S (esalt_bufs[digests_offset].contents[18]); + last_key[ 9] = hc_swap32_S (esalt_bufs[digests_offset].contents[19]); + last_key[10] = hc_swap32_S (esalt_bufs[digests_offset].contents[20]); + last_key[11] = hc_swap32_S (esalt_bufs[digests_offset].contents[21]); + last_key[12] = hc_swap32_S (esalt_bufs[digests_offset].contents[22]); + last_key[13] = hc_swap32_S (esalt_bufs[digests_offset].contents[23]); + last_key[14] = hc_swap32_S (esalt_bufs[digests_offset].contents[24]); + last_key[15] = hc_swap32_S (esalt_bufs[digests_offset].contents[25]); + + // hmac_data + + _des_crypt_decrypt (p1, hmac_data + 0, Ke, Kf, s_SPtrans); + _des_crypt_encrypt (p2, p1, Kc, Kd, s_SPtrans); + _des_crypt_decrypt (out, p2, Ka, Kb, s_SPtrans); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + + iv[0] = hmac_data[0]; + iv[1] = hmac_data[1]; + + hmac_data[0] = out[0]; + hmac_data[1] = out[1]; + + _des_crypt_decrypt (p1, hmac_data + 2, Ke, Kf, s_SPtrans); + _des_crypt_encrypt (p2, p1, Kc, Kd, s_SPtrans); + _des_crypt_decrypt (out, p2, Ka, Kb, s_SPtrans); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + + iv[0] = hmac_data[2]; + iv[1] = hmac_data[3]; + + hmac_data[2] = out[0]; + hmac_data[3] = out[1]; + + // expected_key + + _des_crypt_decrypt (p1, expected_key + 0, Ke, Kf, s_SPtrans); + _des_crypt_encrypt (p2, p1, Kc, Kd, s_SPtrans); + _des_crypt_decrypt (out, p2, Ka, Kb, s_SPtrans); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + + iv[0] = expected_key[0]; + iv[1] = expected_key[1]; + + expected_key[0] = out[0]; + expected_key[1] = out[1]; + + _des_crypt_decrypt (p1, expected_key + 2, Ke, Kf, s_SPtrans); + _des_crypt_encrypt (p2, p1, Kc, Kd, s_SPtrans); + _des_crypt_decrypt (out, p2, Ka, Kb, s_SPtrans); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + + iv[0] = expected_key[2]; + iv[1] = expected_key[3]; + + expected_key[2] = out[0]; + expected_key[3] = out[1]; + + // last_key + + iv[0] = last_iv[0]; + iv[1] = last_iv[1]; + + for (int off = 0; off < 16; off += 2) { - /* First Pass */ - - u32 data[2]; - - data[0] = hc_swap32_S (esalt_bufs[digests_offset].contents[contents_off + 0]); - data[1] = hc_swap32_S (esalt_bufs[digests_offset].contents[contents_off + 1]); - - u32 p1[2]; - - _des_crypt_decrypt (p1, data, Ke, Kf, s_SPtrans); - - /* Second Pass */ - - u32 p2[2]; - - _des_crypt_encrypt (p2, p1, Kc, Kd, s_SPtrans); - - /* Third Pass */ - - u32 out[2]; - - _des_crypt_decrypt (out, p2, Ka, Kb, s_SPtrans); + _des_crypt_decrypt (p1, last_key + off, Ke, Kf, s_SPtrans); + _des_crypt_encrypt (p2, p1, Kc, Kd, s_SPtrans); + _des_crypt_decrypt (out, p2, Ka, Kb, s_SPtrans); out[0] ^= iv[0]; out[1] ^= iv[1]; - decrypted[wx_off + 0] = out[0]; - decrypted[wx_off + 1] = out[1]; + iv[0] = last_key[off + 0]; + iv[1] = last_key[off + 1]; - iv[0] = data[0]; - iv[1] = data[1]; - - if (wx_off == 24) break; + last_key[off + 0] = out[0]; + last_key[off + 1] = out[1]; } w0[0] = tmps[gid].userKey[0]; @@ -532,10 +608,10 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - w0[0] = hc_swap32_S (decrypted[0]); - w0[1] = hc_swap32_S (decrypted[1]); - w0[2] = hc_swap32_S (decrypted[2]); - w0[3] = hc_swap32_S (decrypted[3]); + w0[0] = hc_swap32_S (hmac_data[0]); + w0[1] = hc_swap32_S (hmac_data[1]); + w0[2] = hc_swap32_S (hmac_data[2]); + w0[3] = hc_swap32_S (hmac_data[3]); w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -572,22 +648,22 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) sha1_hmac_init_64 (&ctx, w0, w1, w2, w3); - w0[0] = hc_swap32_S (decrypted[10]); - w0[1] = hc_swap32_S (decrypted[11]); - w0[2] = hc_swap32_S (decrypted[12]); - w0[3] = hc_swap32_S (decrypted[13]); - w1[0] = hc_swap32_S (decrypted[14]); - w1[1] = hc_swap32_S (decrypted[15]); - w1[2] = hc_swap32_S (decrypted[16]); - w1[3] = hc_swap32_S (decrypted[17]); - w2[0] = hc_swap32_S (decrypted[18]); - w2[1] = hc_swap32_S (decrypted[19]); - w2[2] = hc_swap32_S (decrypted[20]); - w2[3] = hc_swap32_S (decrypted[21]); - w3[0] = hc_swap32_S (decrypted[22]); - w3[1] = hc_swap32_S (decrypted[23]); - w3[2] = hc_swap32_S (decrypted[24]); - w3[3] = hc_swap32_S (decrypted[25]); + w0[0] = hc_swap32_S (last_key[ 0]); + w0[1] = hc_swap32_S (last_key[ 1]); + w0[2] = hc_swap32_S (last_key[ 2]); + w0[3] = hc_swap32_S (last_key[ 3]); + w1[0] = hc_swap32_S (last_key[ 4]); + w1[1] = hc_swap32_S (last_key[ 5]); + w1[2] = hc_swap32_S (last_key[ 6]); + w1[3] = hc_swap32_S (last_key[ 7]); + w2[0] = hc_swap32_S (last_key[ 8]); + w2[1] = hc_swap32_S (last_key[ 9]); + w2[2] = hc_swap32_S (last_key[10]); + w2[3] = hc_swap32_S (last_key[11]); + w3[0] = hc_swap32_S (last_key[12]); + w3[1] = hc_swap32_S (last_key[13]); + w3[2] = hc_swap32_S (last_key[14]); + w3[3] = hc_swap32_S (last_key[15]); sha1_hmac_update_64 (&ctx, w0, w1, w2, w3, 64); @@ -595,10 +671,10 @@ KERNEL_FQ void m15300_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v1_t, dpapimk_t)) #define il_pos 0 - if ((decrypted[4] == hc_swap32_S (ctx.opad.h[0])) - && (decrypted[5] == hc_swap32_S (ctx.opad.h[1])) - && (decrypted[6] == hc_swap32_S (ctx.opad.h[2])) - && (decrypted[7] == hc_swap32_S (ctx.opad.h[3]))) + if ((expected_key[0] == hc_swap32_S (ctx.opad.h[0])) + && (expected_key[1] == hc_swap32_S (ctx.opad.h[1])) + && (expected_key[2] == hc_swap32_S (ctx.opad.h[2])) + && (expected_key[3] == hc_swap32_S (ctx.opad.h[3]))) { if (atomic_inc (&hashes_shown[digests_offset]) == 0) { diff --git a/OpenCL/m15900-pure.cl b/OpenCL/m15900-pure.cl index 4ab4f7bd4..82a47cee5 100644 --- a/OpenCL/m15900-pure.cl +++ b/OpenCL/m15900-pure.cl @@ -599,42 +599,111 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) AES256_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - /* 144 bytes */ - u32 decrypted[36] = { 0 }; + u32 out[4]; - u32 contents_pos; - u32 contents_off; - u32 wx_off; + u32 hmac_data[4]; - for (wx_off = 0, contents_pos = 0, contents_off = 0; contents_pos < esalt_bufs[digests_offset].contents_len; wx_off += 4, contents_pos += 16, contents_off += 4) + hmac_data[0] = esalt_bufs[digests_offset].contents[0]; + hmac_data[1] = esalt_bufs[digests_offset].contents[1]; + hmac_data[2] = esalt_bufs[digests_offset].contents[2]; + hmac_data[3] = esalt_bufs[digests_offset].contents[3]; + + u32 expected_key[4]; + + expected_key[0] = esalt_bufs[digests_offset].contents[4]; + expected_key[1] = esalt_bufs[digests_offset].contents[5]; + expected_key[2] = esalt_bufs[digests_offset].contents[6]; + expected_key[3] = esalt_bufs[digests_offset].contents[7]; + + u32 last_iv[4]; + + last_iv[0] = esalt_bufs[digests_offset].contents[16]; + last_iv[1] = esalt_bufs[digests_offset].contents[17]; + last_iv[2] = esalt_bufs[digests_offset].contents[18]; + last_iv[3] = esalt_bufs[digests_offset].contents[19]; + + u32 last_key[16]; + + last_key[ 0] = esalt_bufs[digests_offset].contents[20]; + last_key[ 1] = esalt_bufs[digests_offset].contents[21]; + last_key[ 2] = esalt_bufs[digests_offset].contents[22]; + last_key[ 3] = esalt_bufs[digests_offset].contents[23]; + last_key[ 4] = esalt_bufs[digests_offset].contents[24]; + last_key[ 5] = esalt_bufs[digests_offset].contents[25]; + last_key[ 6] = esalt_bufs[digests_offset].contents[26]; + last_key[ 7] = esalt_bufs[digests_offset].contents[27]; + last_key[ 8] = esalt_bufs[digests_offset].contents[28]; + last_key[ 9] = esalt_bufs[digests_offset].contents[29]; + last_key[10] = esalt_bufs[digests_offset].contents[30]; + last_key[11] = esalt_bufs[digests_offset].contents[31]; + last_key[12] = esalt_bufs[digests_offset].contents[32]; + last_key[13] = esalt_bufs[digests_offset].contents[33]; + last_key[14] = esalt_bufs[digests_offset].contents[34]; + last_key[15] = esalt_bufs[digests_offset].contents[35]; + + // hmac_data + + AES256_decrypt (ks, hmac_data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + out[2] ^= iv[2]; + out[3] ^= iv[3]; + + iv[0] = hmac_data[0]; + iv[1] = hmac_data[1]; + iv[2] = hmac_data[2]; + iv[3] = hmac_data[3]; + + hmac_data[0] = out[0]; + hmac_data[1] = out[1]; + hmac_data[2] = out[2]; + hmac_data[3] = out[3]; + + // expected_key + + AES256_decrypt (ks, expected_key, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + out[0] ^= iv[0]; + out[1] ^= iv[1]; + out[2] ^= iv[2]; + out[3] ^= iv[3]; + + iv[0] = expected_key[0]; + iv[1] = expected_key[1]; + iv[2] = expected_key[2]; + iv[3] = expected_key[3]; + + expected_key[0] = out[0]; + expected_key[1] = out[1]; + expected_key[2] = out[2]; + expected_key[3] = out[3]; + + // last_key + + iv[0] = last_iv[0]; + iv[1] = last_iv[1]; + iv[2] = last_iv[2]; + iv[3] = last_iv[3]; + + for (int off = 0; off < 16; off += 4) { - u32 data[4]; - - data[0] = esalt_bufs[digests_offset].contents[contents_off + 0]; - data[1] = esalt_bufs[digests_offset].contents[contents_off + 1]; - data[2] = esalt_bufs[digests_offset].contents[contents_off + 2]; - data[3] = esalt_bufs[digests_offset].contents[contents_off + 3]; - - u32 out[4]; - - AES256_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + AES256_decrypt (ks, last_key + off, out, s_td0, s_td1, s_td2, s_td3, s_td4); out[0] ^= iv[0]; out[1] ^= iv[1]; out[2] ^= iv[2]; out[3] ^= iv[3]; - decrypted[wx_off + 0] = out[0]; - decrypted[wx_off + 1] = out[1]; - decrypted[wx_off + 2] = out[2]; - decrypted[wx_off + 3] = out[3]; + iv[0] = last_key[off + 0]; + iv[1] = last_key[off + 1]; + iv[2] = last_key[off + 2]; + iv[3] = last_key[off + 3]; - iv[0] = data[0]; - iv[1] = data[1]; - iv[2] = data[2]; - iv[3] = data[3]; - - if (contents_off == 32) break; + last_key[off + 0] = out[0]; + last_key[off + 1] = out[1]; + last_key[off + 2] = out[2]; + last_key[off + 3] = out[3]; } w0[0] = tmps[gid].userKey[0]; @@ -674,10 +743,10 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) sha512_hmac_init_128 (&ctx, w0, w1, w2, w3, w4, w5, w6, w7); - w0[0] = decrypted[0]; - w0[1] = decrypted[1]; - w0[2] = decrypted[2]; - w0[3] = decrypted[3]; + w0[0] = hmac_data[0]; + w0[1] = hmac_data[1]; + w0[2] = hmac_data[2]; + w0[3] = hmac_data[3]; w1[0] = 0; w1[1] = 0; w1[2] = 0; @@ -746,22 +815,22 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) sha512_hmac_init_128 (&ctx, w0, w1, w2, w3, w4, w5, w6, w7); - w0[0] = decrypted[20]; - w0[1] = decrypted[21]; - w0[2] = decrypted[22]; - w0[3] = decrypted[23]; - w1[0] = decrypted[24]; - w1[1] = decrypted[25]; - w1[2] = decrypted[26]; - w1[3] = decrypted[27]; - w2[0] = decrypted[28]; - w2[1] = decrypted[29]; - w2[2] = decrypted[30]; - w2[3] = decrypted[31]; - w3[0] = decrypted[32]; - w3[1] = decrypted[33]; - w3[2] = decrypted[34]; - w3[3] = decrypted[35]; + w0[0] = last_key[ 0]; + w0[1] = last_key[ 1]; + w0[2] = last_key[ 2]; + w0[3] = last_key[ 3]; + w1[0] = last_key[ 4]; + w1[1] = last_key[ 5]; + w1[2] = last_key[ 6]; + w1[3] = last_key[ 7]; + w2[0] = last_key[ 8]; + w2[1] = last_key[ 9]; + w2[2] = last_key[10]; + w2[3] = last_key[11]; + w3[0] = last_key[12]; + w3[1] = last_key[13]; + w3[2] = last_key[14]; + w3[3] = last_key[15]; w4[0] = 0; w4[1] = 0; w4[2] = 0; @@ -785,10 +854,10 @@ KERNEL_FQ void m15900_comp (KERN_ATTR_TMPS_ESALT (dpapimk_tmp_v2_t, dpapimk_t)) #define il_pos 0 - if ((decrypted[4] == h32_from_64_S (ctx.opad.h[0])) - && (decrypted[5] == l32_from_64_S (ctx.opad.h[0])) - && (decrypted[6] == h32_from_64_S (ctx.opad.h[1])) - && (decrypted[7] == l32_from_64_S (ctx.opad.h[1]))) + if ((expected_key[0] == h32_from_64_S (ctx.opad.h[0])) + && (expected_key[1] == l32_from_64_S (ctx.opad.h[0])) + && (expected_key[2] == h32_from_64_S (ctx.opad.h[1])) + && (expected_key[3] == l32_from_64_S (ctx.opad.h[1]))) { if (atomic_inc (&hashes_shown[digests_offset]) == 0) {