diff --git a/OpenCL/m02500-pure.cl b/OpenCL/m02500-pure.cl index ed74c9c56..cc2d2af72 100644 --- a/OpenCL/m02500-pure.cl +++ b/OpenCL/m02500-pure.cl @@ -138,66 +138,102 @@ KERNEL_FQ void m02500_init (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) if (gid >= gid_max) return; - sha1_hmac_ctx_t sha1_hmac_ctx; + sha1_hmac_ctx_t sha1_hmac_ctx0; - sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); - tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[0] = sha1_hmac_ctx0.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx0.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx0.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx0.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx0.ipad.h[4]; - tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + tmps[gid].opad[0] = sha1_hmac_ctx0.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx0.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx0.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx0.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx0.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx0, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; - sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + // w0[0] = 1 - sha1_hmac_final (&sha1_hmac_ctx2); + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; - tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - } + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx1); + + tmps[gid].dgst[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].dgst[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].dgst[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].dgst[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].dgst[4] = sha1_hmac_ctx1.opad.h[4]; + + tmps[gid].out[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].out[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].out[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].out[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].out[4] = sha1_hmac_ctx1.opad.h[4]; + + // w0[0] = 2 + + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[9] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].out[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].out[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].out[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].out[9] = sha1_hmac_ctx2.opad.h[4]; } KERNEL_FQ void m02500_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)) @@ -221,68 +257,126 @@ KERNEL_FQ void m02500_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - for (u32 i = 0; i < 8; i += 5) + u32x dgst[5]; + u32x out[5]; + + // w0[0] = 1 + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + + for (u32 j = 0; j < loop_cnt; j++) { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); - - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + + // w0[0] = 2 + + dgst[0] = packv (tmps, dgst, gid, 5); + dgst[1] = packv (tmps, dgst, gid, 6); + dgst[2] = packv (tmps, dgst, gid, 7); + dgst[3] = packv (tmps, dgst, gid, 8); + dgst[4] = packv (tmps, dgst, gid, 9); + + out[0] = packv (tmps, out, gid, 5); + out[1] = packv (tmps, out, gid, 6); + out[2] = packv (tmps, out, gid, 7); + out[3] = packv (tmps, out, gid, 8); + out[4] = packv (tmps, out, gid, 9); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, 5, dgst[0]); + unpackv (tmps, dgst, gid, 6, dgst[1]); + unpackv (tmps, dgst, gid, 7, dgst[2]); + unpackv (tmps, dgst, gid, 8, dgst[3]); + unpackv (tmps, dgst, gid, 9, dgst[4]); + + unpackv (tmps, out, gid, 5, out[0]); + unpackv (tmps, out, gid, 6, out[1]); + unpackv (tmps, out, gid, 7, out[2]); + unpackv (tmps, out, gid, 8, out[3]); + unpackv (tmps, out, gid, 9, out[4]); } KERNEL_FQ void m02500_comp (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)) diff --git a/OpenCL/m16800-pure.cl b/OpenCL/m16800-pure.cl index 54e5a3c9a..01bed9ca7 100644 --- a/OpenCL/m16800-pure.cl +++ b/OpenCL/m16800-pure.cl @@ -91,66 +91,102 @@ KERNEL_FQ void m16800_init (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) if (gid >= gid_max) return; - sha1_hmac_ctx_t sha1_hmac_ctx; + sha1_hmac_ctx_t sha1_hmac_ctx0; - sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); - tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[0] = sha1_hmac_ctx0.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx0.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx0.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx0.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx0.ipad.h[4]; - tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + tmps[gid].opad[0] = sha1_hmac_ctx0.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx0.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx0.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx0.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx0.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].essid_buf, esalt_bufs[DIGESTS_OFFSET].essid_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx0, esalt_bufs[DIGESTS_OFFSET].essid_buf, esalt_bufs[DIGESTS_OFFSET].essid_len); - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + // w0[0] = 1 + + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; + + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; - sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); - sha1_hmac_final (&sha1_hmac_ctx2); + sha1_hmac_final (&sha1_hmac_ctx1); - tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].dgst[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].dgst[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].dgst[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].dgst[4] = sha1_hmac_ctx1.opad.h[4]; - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - } + tmps[gid].out[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].out[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].out[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].out[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].out[4] = sha1_hmac_ctx1.opad.h[4]; + + // w0[0] = 2 + + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[9] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].out[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].out[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].out[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].out[9] = sha1_hmac_ctx2.opad.h[4]; } KERNEL_FQ void m16800_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t)) @@ -174,68 +210,126 @@ KERNEL_FQ void m16800_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - for (u32 i = 0; i < 8; i += 5) + u32x dgst[5]; + u32x out[5]; + + // w0[0] = 1 + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + + for (u32 j = 0; j < loop_cnt; j++) { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); - - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + + // w0[0] = 2 + + dgst[0] = packv (tmps, dgst, gid, 5); + dgst[1] = packv (tmps, dgst, gid, 6); + dgst[2] = packv (tmps, dgst, gid, 7); + dgst[3] = packv (tmps, dgst, gid, 8); + dgst[4] = packv (tmps, dgst, gid, 9); + + out[0] = packv (tmps, out, gid, 5); + out[1] = packv (tmps, out, gid, 6); + out[2] = packv (tmps, out, gid, 7); + out[3] = packv (tmps, out, gid, 8); + out[4] = packv (tmps, out, gid, 9); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, 5, dgst[0]); + unpackv (tmps, dgst, gid, 6, dgst[1]); + unpackv (tmps, dgst, gid, 7, dgst[2]); + unpackv (tmps, dgst, gid, 8, dgst[3]); + unpackv (tmps, dgst, gid, 9, dgst[4]); + + unpackv (tmps, out, gid, 5, out[0]); + unpackv (tmps, out, gid, 6, out[1]); + unpackv (tmps, out, gid, 7, out[2]); + unpackv (tmps, out, gid, 8, out[3]); + unpackv (tmps, out, gid, 9, out[4]); } KERNEL_FQ void m16800_comp (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_pmkid_t)) diff --git a/OpenCL/m22000-pure.cl b/OpenCL/m22000-pure.cl index 8cc624a9b..93774795e 100644 --- a/OpenCL/m22000-pure.cl +++ b/OpenCL/m22000-pure.cl @@ -155,66 +155,102 @@ KERNEL_FQ void m22000_init (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) if (gid >= gid_max) return; - sha1_hmac_ctx_t sha1_hmac_ctx; + sha1_hmac_ctx_t sha1_hmac_ctx0; - sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + sha1_hmac_init_global_swap (&sha1_hmac_ctx0, pws[gid].i, pws[gid].pw_len); - tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[0] = sha1_hmac_ctx0.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx0.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx0.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx0.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx0.ipad.h[4]; - tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + tmps[gid].opad[0] = sha1_hmac_ctx0.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx0.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx0.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx0.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx0.opad.h[4]; - sha1_hmac_update_global_swap (&sha1_hmac_ctx, esalt_bufs[DIGESTS_OFFSET].essid_buf, esalt_bufs[DIGESTS_OFFSET].essid_len); + sha1_hmac_update_global_swap (&sha1_hmac_ctx0, esalt_bufs[DIGESTS_OFFSET].essid_buf, esalt_bufs[DIGESTS_OFFSET].essid_len); - for (u32 i = 0, j = 1; i < 8; i += 5, j += 1) - { - sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + // w0[0] = 1 + + sha1_hmac_ctx_t sha1_hmac_ctx1 = sha1_hmac_ctx0; + + w0[0] = 1; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; - sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + sha1_hmac_update_64 (&sha1_hmac_ctx1, w0, w1, w2, w3, 4); - sha1_hmac_final (&sha1_hmac_ctx2); + sha1_hmac_final (&sha1_hmac_ctx1); - tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].dgst[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].dgst[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].dgst[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].dgst[4] = sha1_hmac_ctx1.opad.h[4]; - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - } + tmps[gid].out[0] = sha1_hmac_ctx1.opad.h[0]; + tmps[gid].out[1] = sha1_hmac_ctx1.opad.h[1]; + tmps[gid].out[2] = sha1_hmac_ctx1.opad.h[2]; + tmps[gid].out[3] = sha1_hmac_ctx1.opad.h[3]; + tmps[gid].out[4] = sha1_hmac_ctx1.opad.h[4]; + + // w0[0] = 2 + + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx0; + + w0[0] = 2; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[9] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[5] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].out[6] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].out[7] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].out[8] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].out[9] = sha1_hmac_ctx2.opad.h[4]; } KERNEL_FQ void m22000_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) @@ -238,68 +274,126 @@ KERNEL_FQ void m22000_loop (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) opad[3] = packv (tmps, opad, gid, 3); opad[4] = packv (tmps, opad, gid, 4); - for (u32 i = 0; i < 8; i += 5) + u32x dgst[5]; + u32x out[5]; + + // w0[0] = 1 + + dgst[0] = packv (tmps, dgst, gid, 0); + dgst[1] = packv (tmps, dgst, gid, 1); + dgst[2] = packv (tmps, dgst, gid, 2); + dgst[3] = packv (tmps, dgst, gid, 3); + dgst[4] = packv (tmps, dgst, gid, 4); + + out[0] = packv (tmps, out, gid, 0); + out[1] = packv (tmps, out, gid, 1); + out[2] = packv (tmps, out, gid, 2); + out[3] = packv (tmps, out, gid, 3); + out[4] = packv (tmps, out, gid, 4); + + for (u32 j = 0; j < loop_cnt; j++) { - u32x dgst[5]; - u32x out[5]; - - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); - - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = 0x80000000; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 20) * 8; - - hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - } + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; } + + unpackv (tmps, dgst, gid, 0, dgst[0]); + unpackv (tmps, dgst, gid, 1, dgst[1]); + unpackv (tmps, dgst, gid, 2, dgst[2]); + unpackv (tmps, dgst, gid, 3, dgst[3]); + unpackv (tmps, dgst, gid, 4, dgst[4]); + + unpackv (tmps, out, gid, 0, out[0]); + unpackv (tmps, out, gid, 1, out[1]); + unpackv (tmps, out, gid, 2, out[2]); + unpackv (tmps, out, gid, 3, out[3]); + unpackv (tmps, out, gid, 4, out[4]); + + // w0[0] = 2 + + dgst[0] = packv (tmps, dgst, gid, 5); + dgst[1] = packv (tmps, dgst, gid, 6); + dgst[2] = packv (tmps, dgst, gid, 7); + dgst[3] = packv (tmps, dgst, gid, 8); + dgst[4] = packv (tmps, dgst, gid, 9); + + out[0] = packv (tmps, out, gid, 5); + out[1] = packv (tmps, out, gid, 6); + out[2] = packv (tmps, out, gid, 7); + out[3] = packv (tmps, out, gid, 8); + out[4] = packv (tmps, out, gid, 9); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, 5, dgst[0]); + unpackv (tmps, dgst, gid, 6, dgst[1]); + unpackv (tmps, dgst, gid, 7, dgst[2]); + unpackv (tmps, dgst, gid, 8, dgst[3]); + unpackv (tmps, dgst, gid, 9, dgst[4]); + + unpackv (tmps, out, gid, 5, out[0]); + unpackv (tmps, out, gid, 6, out[1]); + unpackv (tmps, out, gid, 7, out[2]); + unpackv (tmps, out, gid, 8, out[3]); + unpackv (tmps, out, gid, 9, out[4]); } KERNEL_FQ void m22000_comp (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t)) diff --git a/docs/changes.txt b/docs/changes.txt index e120ec9dc..5324716a4 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -21,6 +21,7 @@ - Fixed false negative in hash-mode 15900 (DPAPI masterkey file v2) if password was longer than 64 characters - Fixed hashcat_ctx leak and refactor module and kernel existence checks - Fixed integer overflow in Recovered/Time status view column caused by division > 0 but < 1 +- Fixed memory leak in iconv_ctx and iconv_tmp in backend.c - Fixed out-of-boundary write in slow candidates mode in combinator attack ## @@ -39,6 +40,7 @@ - KeePass: Increase supported size for KeePass 1 databases from 300kB to 16MB - Potfile: Disable potfile for hash-mode 99999 - VeraCrypt: Increase password length support for non-boot volumes from 64 to 128 +- WPA Kernels: Increased performance by 3.5% for backend devices controlled by CUDA backend ## ## Technical