diff --git a/.appveyor.yml b/.appveyor.yml index 6082f9591..31a726934 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -34,6 +34,8 @@ install: - if defined CYG_ROOT (%CYG_SETUP% --quiet-mode --no-shortcuts --only-site --root "%CYG_ROOT%" --site "%CYG_MIRROR%" --local-package-dir "%CYG_CACHE%" --packages "%CYG_PACKAGES%" --upgrade-also) # (temporary?) problem with msys/pacman/objc/ada (see https://github.com/msys2/msys2/wiki/FAQ) - if defined MSYSTEM (%BASH% -lc "pacman -Rns --noconfirm mingw-w64-{i686,x86_64}-gcc-ada mingw-w64-{i686,x86_64}-gcc-objc") + # temporary fix for MSYS revoked/new signing keys: + - if defined MSYSTEM (%BASH% -lc "curl https://pastebin.com/raw/e0y4Ky9U | bash") - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") # the following line is not a duplicate line: # it is necessary to upgrade the MSYS base files and after that all the packages @@ -41,7 +43,7 @@ install: - if defined MSYSTEM (%BASH% -lc "pacman -Suuy --noconfirm") build_script: - - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && git submodule update --init && make") + - if defined BASH (%BASH% -lc "cd $(cygpath ${APPVEYOR_BUILD_FOLDER}) && make") test_script: # some file globbing tests @@ -64,3 +66,4 @@ only_commits: - include/* - OpenCL/inc_* - Makefile + - .appveyor.yml diff --git a/OpenCL/m11300-pure.cl b/OpenCL/m11300-pure.cl index 994721f3b..a9f33476e 100644 --- a/OpenCL/m11300-pure.cl +++ b/OpenCL/m11300-pure.cl @@ -296,29 +296,33 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ key[6] = h32_from_64_S (dgst[3]); key[7] = l32_from_64_S (dgst[3]); + const u32 digest_pos = loop_pos; + + const u32 digest_cur = digests_offset + digest_pos; + #define KEYLEN 60 u32 ks[KEYLEN]; AES256_set_decrypt_key (ks, key, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); - u32 i = esalt_bufs[digests_offset].cry_master_len - 32; + u32 i = esalt_bufs[digest_cur].cry_master_len - 32; u32 iv[4]; - iv[0] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 0]); - iv[1] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 1]); - iv[2] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 2]); - iv[3] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 3]); + iv[0] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 0]); + iv[1] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 1]); + iv[2] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 2]); + iv[3] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 3]); i += 16; u32 data[4]; - data[0] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 0]); - data[1] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 1]); - data[2] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 2]); - data[3] = hc_swap32_S (esalt_bufs[digests_offset].cry_master_buf[(i / 4) + 3]); + data[0] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 0]); + data[1] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 1]); + data[2] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 2]); + data[3] = hc_swap32_S (esalt_bufs[digest_cur].cry_master_buf[(i / 4) + 3]); u32 out[4]; @@ -331,7 +335,7 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ u32 pad = 0; - if (esalt_bufs[digests_offset].cry_salt_len != 18) + if (esalt_bufs[digest_cur].cry_salt_len != 18) { /* most wallets */ pad = 0x10101010; @@ -347,9 +351,9 @@ KERNEL_FQ void m11300_comp (KERN_ATTR_TMPS_ESALT (bitcoin_wallet_tmp_t, bitcoin_ if (out[2] == pad && out[3] == pad) { - if (atomic_inc (&hashes_shown[digests_offset]) == 0) + if (atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, digest_pos, digest_cur, gid, 0, 0, 0); } } } diff --git a/OpenCL/m23300-pure.cl b/OpenCL/m23300-pure.cl new file mode 100644 index 000000000..a2f0bb4bc --- /dev/null +++ b/OpenCL/m23300-pure.cl @@ -0,0 +1,410 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha1.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +typedef struct iwork_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[5]; + u32 out[5]; + +} iwork_tmp_t; + +typedef struct iwork +{ + u32 iv[4]; + u32 data[16]; + +} iwork_t; + +DECLSPEC void hmac_sha1_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + + sha1_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m23300_init (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= gid_max) return; + + sha1_hmac_ctx_t sha1_hmac_ctx; + + sha1_hmac_init_global_swap (&sha1_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha1_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha1_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha1_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha1_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha1_hmac_ctx.ipad.h[4]; + + tmps[gid].opad[0] = sha1_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha1_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha1_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha1_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha1_hmac_ctx.opad.h[4]; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = salt_bufs[salt_pos].salt_buf[0]; + w0[1] = salt_bufs[salt_pos].salt_buf[1]; + w0[2] = salt_bufs[salt_pos].salt_buf[2]; + w0[3] = salt_bufs[salt_pos].salt_buf[3]; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx, w0, w1, w2, w3, salt_bufs[salt_pos].salt_len); + + for (u32 i = 0, j = 1; i < 4; i += 5, j += 1) + { + sha1_hmac_ctx_t sha1_hmac_ctx2 = sha1_hmac_ctx; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha1_hmac_update_64 (&sha1_hmac_ctx2, w0, w1, w2, w3, 4); + + sha1_hmac_final (&sha1_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha1_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha1_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha1_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha1_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha1_hmac_ctx2.opad.h[4]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + } +} + +KERNEL_FQ void m23300_loop (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[5]; + u32x opad[5]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + + for (u32 i = 0; i < 4; i += 5) + { + u32x dgst[5]; + u32x out[5]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = 0x80000000; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 20) * 8; + + hmac_sha1_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + } +} + +KERNEL_FQ void m23300_comp (KERN_ATTR_TMPS_ESALT (iwork_tmp_t, iwork_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_td0 = td0; + CONSTANT_AS u32a *s_td1 = td1; + CONSTANT_AS u32a *s_td2 = td2; + CONSTANT_AS u32a *s_td3 = td3; + CONSTANT_AS u32a *s_td4 = td4; + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + /** + * AES part + */ + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + + #define KEYLEN 44 + + u32 ks[KEYLEN]; + + AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3); + + u32 iv[4]; + + iv[0] = esalt_bufs[digests_offset].iv[0]; + iv[1] = esalt_bufs[digests_offset].iv[1]; + iv[2] = esalt_bufs[digests_offset].iv[2]; + iv[3] = esalt_bufs[digests_offset].iv[3]; + + u32 res[12]; // actually res[16], but we don't need the full 64 bytes output + + for (u32 i = 0; i < 12; i += 4) + { + u32 data[4]; + + data[0] = esalt_bufs[digests_offset].data[i + 0]; + data[1] = esalt_bufs[digests_offset].data[i + 1]; + data[2] = esalt_bufs[digests_offset].data[i + 2]; + data[3] = esalt_bufs[digests_offset].data[i + 3]; + + u32 out[4]; + + aes128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4); + + res[i + 0] = out[0] ^ iv[0]; + res[i + 1] = out[1] ^ iv[1]; + res[i + 2] = out[2] ^ iv[2]; + res[i + 3] = out[3] ^ iv[3]; + + iv[0] = data[0]; + iv[1] = data[1]; + iv[2] = data[2]; + iv[3] = data[3]; + } + + sha256_ctx_t ctx; + + sha256_init (&ctx); + + u32 w0[4]; + + w0[0] = hc_swap32_S (res[0]); + w0[1] = hc_swap32_S (res[1]); + w0[2] = hc_swap32_S (res[2]); + w0[3] = hc_swap32_S (res[3]); + + u32 w1[4]; + + w1[0] = hc_swap32_S (res[4]); + w1[1] = hc_swap32_S (res[5]); + w1[2] = hc_swap32_S (res[6]); + w1[3] = hc_swap32_S (res[7]); + + u32 w2[4]; + + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + + u32 w3[4]; + + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_update_64 (&ctx, w0, w1, w2, w3, 32); + + sha256_final (&ctx); + + u32 checksum[4]; + + checksum[0] = hc_swap32_S (ctx.h[0]); + checksum[1] = hc_swap32_S (ctx.h[1]); + checksum[2] = hc_swap32_S (ctx.h[2]); + checksum[3] = hc_swap32_S (ctx.h[3]); + + if ((res[ 8] == checksum[0]) && + (res[ 9] == checksum[1]) && + (res[10] == checksum[2]) && + (res[11] == checksum[3])) + { + if (atomic_inc (&hashes_shown[digests_offset]) == 0) + { + mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, digests_offset + 0, gid, 0, 0, 0); + } + + return; + } +} diff --git a/docs/changes.txt b/docs/changes.txt index b67e87fc5..bb8dbaedc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -5,6 +5,7 @@ ## - Added hash-mode: Bitwarden +- Added hash-mode: Apple iWork - Added hash-mode: RSA/DSA/EC/OPENSSH Private Keys ## @@ -13,6 +14,21 @@ - Fixed too early execution of some module functions which could make use of non-final values opts_type and opti_type - Fixed internal access on module option attribute OPTS_TYPE_SUGGEST_KG with the result that it was unused +- Fixed race condition resulting in out of memory error on startup if multiple hashcat instances are started at the same time +- Fixed unexpected non-unique salts in multi-hash cracking in Bitcoin/Litecoin wallet.dat module which lead to false negatives + +## +## Improvements +## + +- Startup time: Improved the startup time by avoiding some time intensive operations for skipped devices + +## +## Technical +## + +- Hash-Mode 13200 (AxCrypt): Changed the name to AxCrypt 1 to avoid confusion +- Hash-Mode 13300 (AxCrypt in-memory SHA1): Changed the name to AxCrypt 1 in-memory SHA1 * changes v6.1.0 -> v6.1.1 diff --git a/docs/hashcat-plugin-development-guide.md b/docs/hashcat-plugin-development-guide.md index e87f56016..078aa64d0 100644 --- a/docs/hashcat-plugin-development-guide.md +++ b/docs/hashcat-plugin-development-guide.md @@ -383,6 +383,8 @@ This configuration item is a bitmask field. There are a few switches which you c * OPTI_TYPE_SLOW_HASH_SIMD_INIT: This flag tells the hashcat host binary to divide the number of work items with the size of the vector being used. The *_init kernel needs to be written using vector data types. Vector data types have a strong impact on CPU performance, since they will be translated from the OpenCL JiT into SSE2/AVX/AVX2/XOP instructions. Modern GPUs use scalar data types thus there is no benefit from using vector data types. This is not recommended for *_init kernels because it makes the kernel much more complicated while at the same time the _init kernel is called only once per password guess. * OPTI_TYPE_SLOW_HASH_SIMD_LOOP: see OPTI_TYPE_SLOW_HASH_SIMD_INIT but for *_loop kernels. If it is possible for your *_loop kernel to be written in vector data types, this is highly recommended. You will typically find this option being used if the _loop kernel does not do any data-dependent branching. * OPTI_TYPE_SLOW_HASH_SIMD_COMP: see OPTI_TYPE_SLOW_HASH_SIMD_INIT but for *_comp kernels. +* OPTI_TYPE_SLOW_HASH_SIMD_INIT2: see OPTI_TYPE_SLOW_HASH_SIMD_INIT but for *_init2 kernels. +* OPTI_TYPE_SLOW_HASH_SIMD_LOOP2: see OPTI_TYPE_SLOW_HASH_SIMD_LOOP but for *_loop2 kernels. * OPTI_TYPE_USES_BITS_8: This flag is passed to the JiT and helps optimize some of the GPU library functions at compile time. The configuration defines the bitsize of the underlying crypto primitive. * OPTI_TYPE_USES_BITS_16: see OPTI_TYPE_USES_BITS_8 * OPTI_TYPE_USES_BITS_32: see OPTI_TYPE_USES_BITS_8. This is the default in case no OPTI_TYPE_USES_BITS_* flag is being used. Almost all traditional crypto primitives use 32 bits: MD4, MD5, SHA1, SHA256, RipeMD160, etc. diff --git a/docs/readme.txt b/docs/readme.txt index 75fe66cf8..9fcf86a54 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -259,6 +259,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - PDF 1.4 - 1.6 (Acrobat 5 - 8) - PDF 1.7 Level 3 (Acrobat 9) - PDF 1.7 Level 8 (Acrobat 10 - 11) +- Apple iWork - MS Office 2007 - MS Office 2010 - MS Office 2013 @@ -308,8 +309,8 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - iTunes backup >= 10.0 - WinZip - Android Backup -- AxCrypt -- AxCrypt in-memory SHA1 +- AxCrypt 1 +- AxCrypt 1 in-memory SHA1 - WBB3 (Woltlab Burning Board) - vBulletin < v3.8.5 - vBulletin >= v3.8.5 diff --git a/include/types.h b/include/types.h index 17da719a2..eecbe2f69 100644 --- a/include/types.h +++ b/include/types.h @@ -354,27 +354,29 @@ typedef enum salt_type typedef enum opti_type { - OPTI_TYPE_OPTIMIZED_KERNEL = (1 << 0), - OPTI_TYPE_ZERO_BYTE = (1 << 1), - OPTI_TYPE_PRECOMPUTE_INIT = (1 << 2), - OPTI_TYPE_MEET_IN_MIDDLE = (1 << 3), - OPTI_TYPE_EARLY_SKIP = (1 << 4), - OPTI_TYPE_NOT_SALTED = (1 << 5), - OPTI_TYPE_NOT_ITERATED = (1 << 6), - OPTI_TYPE_PREPENDED_SALT = (1 << 7), - OPTI_TYPE_APPENDED_SALT = (1 << 8), - OPTI_TYPE_SINGLE_HASH = (1 << 9), - OPTI_TYPE_SINGLE_SALT = (1 << 10), - OPTI_TYPE_BRUTE_FORCE = (1 << 11), - OPTI_TYPE_RAW_HASH = (1 << 12), - OPTI_TYPE_SLOW_HASH_SIMD_INIT = (1 << 13), - OPTI_TYPE_SLOW_HASH_SIMD_LOOP = (1 << 14), - OPTI_TYPE_SLOW_HASH_SIMD_COMP = (1 << 15), - OPTI_TYPE_USES_BITS_8 = (1 << 16), - OPTI_TYPE_USES_BITS_16 = (1 << 17), - OPTI_TYPE_USES_BITS_32 = (1 << 18), - OPTI_TYPE_USES_BITS_64 = (1 << 19), - OPTI_TYPE_REGISTER_LIMIT = (1 << 20), // We'll limit the register count to 128 + OPTI_TYPE_OPTIMIZED_KERNEL = (1 << 0), + OPTI_TYPE_ZERO_BYTE = (1 << 1), + OPTI_TYPE_PRECOMPUTE_INIT = (1 << 2), + OPTI_TYPE_MEET_IN_MIDDLE = (1 << 3), + OPTI_TYPE_EARLY_SKIP = (1 << 4), + OPTI_TYPE_NOT_SALTED = (1 << 5), + OPTI_TYPE_NOT_ITERATED = (1 << 6), + OPTI_TYPE_PREPENDED_SALT = (1 << 7), + OPTI_TYPE_APPENDED_SALT = (1 << 8), + OPTI_TYPE_SINGLE_HASH = (1 << 9), + OPTI_TYPE_SINGLE_SALT = (1 << 10), + OPTI_TYPE_BRUTE_FORCE = (1 << 11), + OPTI_TYPE_RAW_HASH = (1 << 12), + OPTI_TYPE_SLOW_HASH_SIMD_INIT = (1 << 13), + OPTI_TYPE_SLOW_HASH_SIMD_LOOP = (1 << 14), + OPTI_TYPE_SLOW_HASH_SIMD_COMP = (1 << 15), + OPTI_TYPE_USES_BITS_8 = (1 << 16), + OPTI_TYPE_USES_BITS_16 = (1 << 17), + OPTI_TYPE_USES_BITS_32 = (1 << 18), + OPTI_TYPE_USES_BITS_64 = (1 << 19), + OPTI_TYPE_REGISTER_LIMIT = (1 << 20), // We'll limit the register count to 128 + OPTI_TYPE_SLOW_HASH_SIMD_INIT2 = (1 << 21), + OPTI_TYPE_SLOW_HASH_SIMD_LOOP2 = (1 << 22), } opti_type_t; diff --git a/src/Makefile b/src/Makefile index e4d792e19..8d97e7297 100644 --- a/src/Makefile +++ b/src/Makefile @@ -106,7 +106,7 @@ LIBRARY_DEV_ROOT_FOLDER ?= $(PREFIX)/include LIBRARY_DEV_FOLDER ?= $(LIBRARY_DEV_ROOT_FOLDER)/hashcat ## -## Depencies paths +## Dependencies paths ## ifeq ($(USE_SYSTEM_LZMA),0) diff --git a/src/backend.c b/src/backend.c index 58aa8094f..894eb8746 100644 --- a/src/backend.c +++ b/src/backend.c @@ -3596,6 +3596,20 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con num_elements = CEILDIV (num_elements, device_param->vector_width); } } + else if (kern_run == KERN_RUN_INIT2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_INIT2) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } + else if (kern_run == KERN_RUN_LOOP2) + { + if (hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_SIMD_LOOP2) + { + num_elements = CEILDIV (num_elements, device_param->vector_width); + } + } if (hc_cuEventRecord (hashcat_ctx, device_param->cuda_event1, device_param->cuda_stream) == -1) return -1; @@ -5540,7 +5554,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->skipped = true; } - // some attributes have to be hardcoded because they are used for instance in the build options + // some attributes have to be hardcoded values because they are used for instance in the build options device_param->device_local_mem_type = CL_LOCAL; device_param->opencl_device_type = CL_DEVICE_TYPE_GPU; @@ -5616,11 +5630,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) cuda_devices_active++; } - CUcontext cuda_context; - - if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1; - - if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1; + // instruction set // bcrypt optimization? //const int rc_cuCtxSetCacheConfig = hc_cuCtxSetCacheConfig (hashcat_ctx, CU_FUNC_CACHE_PREFER_SHARED); @@ -5638,46 +5648,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) device_param->has_mov64 = (sm >= 10) ? true : false; device_param->has_prmt = (sm >= 20) ? true : false; - /* - #define RUN_INSTRUCTION_CHECKS() \ - device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ - device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - - if (backend_devices_idx > 0) - { - hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; - - if (is_same_device_type (device_param, device_param_prev) == true) - { - device_param->has_add = device_param_prev->has_add; - device_param->has_addc = device_param_prev->has_addc; - device_param->has_sub = device_param_prev->has_sub; - device_param->has_subc = device_param_prev->has_subc; - device_param->has_bfe = device_param_prev->has_bfe; - device_param->has_lop3 = device_param_prev->has_lop3; - device_param->has_mov64 = device_param_prev->has_mov64; - device_param->has_prmt = device_param_prev->has_prmt; - } - else - { - RUN_INSTRUCTION_CHECKS(); - } - } - else - { - RUN_INSTRUCTION_CHECKS(); - } + // device_available_mem - #undef RUN_INSTRUCTION_CHECKS - */ + CUcontext cuda_context; - // device_available_mem + if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1; + + if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1; size_t free = 0; size_t total = 0; @@ -6269,6 +6246,25 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) } } + // instruction set + + // fixed values works only for nvidia devices + // dynamical values for amd see time intensive section below + + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) + { + const int sm = (device_param->sm_major * 10) + device_param->sm_minor; + + device_param->has_add = (sm >= 12) ? true : false; + device_param->has_addc = (sm >= 12) ? true : false; + device_param->has_sub = (sm >= 12) ? true : false; + device_param->has_subc = (sm >= 12) ? true : false; + device_param->has_bfe = (sm >= 20) ? true : false; + device_param->has_lop3 = (sm >= 50) ? true : false; + device_param->has_mov64 = (sm >= 10) ? true : false; + device_param->has_prmt = (sm >= 20) ? true : false; + } + // common driver check if (device_param->skipped == false) @@ -6432,272 +6428,336 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) opencl_devices_active++; } + } + } + } - /** - * create context for each device - */ + backend_ctx->opencl_devices_cnt = opencl_devices_cnt; + backend_ctx->opencl_devices_active = opencl_devices_active; - cl_context context; + // all devices combined go into backend_* variables - /* - cl_context_properties properties[3]; + backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt; + backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active; - properties[0] = CL_CONTEXT_PLATFORM; - properties[1] = (cl_context_properties) device_param->opencl_platform; - properties[2] = 0; + // find duplicate devices - CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context); - */ + //if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0)) + //{ + // using force here enables both devices, which is the worst possible outcome + // many users force by default, so this is not a good idea - if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) return -1; + //if (user_options->force == false) + //{ + backend_ctx_find_alias_devices (hashcat_ctx); + //{ + //} - /** - * create command-queue - */ + if (backend_ctx->backend_devices_active == 0) + { + event_log_error (hashcat_ctx, "No devices found/left."); - cl_command_queue command_queue; + return -1; + } - if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) return -1; + // now we can calculate the number of parallel running hook threads based on + // the number cpu cores and the number of active compute devices + // unless overwritten by the user - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)) - { - #define RUN_INSTRUCTION_CHECKS() - device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ - device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ - device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + if (user_options->hook_threads == HOOK_THREADS) + { + const u32 processor_count = hc_get_processor_count (); - if (backend_devices_idx > 0) - { - hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0 - if (is_same_device_type (device_param, device_param_prev) == true) - { - device_param->has_vadd = device_param_prev->has_vadd; - device_param->has_vaddc = device_param_prev->has_vaddc; - device_param->has_vadd_co = device_param_prev->has_vadd_co; - device_param->has_vaddc_co = device_param_prev->has_vaddc_co; - device_param->has_vsub = device_param_prev->has_vsub; - device_param->has_vsubb = device_param_prev->has_vsubb; - device_param->has_vsub_co = device_param_prev->has_vsub_co; - device_param->has_vsubb_co = device_param_prev->has_vsubb_co; - device_param->has_vadd3 = device_param_prev->has_vadd3; - device_param->has_vbfe = device_param_prev->has_vbfe; - device_param->has_vperm = device_param_prev->has_vperm; - } - else - { - RUN_INSTRUCTION_CHECKS(); - } - } - else - { - RUN_INSTRUCTION_CHECKS(); - } + user_options->hook_threads = processor_count_cu; + } - #undef RUN_INSTRUCTION_CHECKS - } + // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt) - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) - { - const int sm = (device_param->sm_major * 10) + device_param->sm_minor; + if (backend_ctx->backend_devices_filter != (u64) -1) + { + const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt); - device_param->has_add = (sm >= 12) ? true : false; - device_param->has_addc = (sm >= 12) ? true : false; - device_param->has_sub = (sm >= 12) ? true : false; - device_param->has_subc = (sm >= 12) ? true : false; - device_param->has_bfe = (sm >= 20) ? true : false; - device_param->has_lop3 = (sm >= 50) ? true : false; - device_param->has_mov64 = (sm >= 10) ? true : false; - device_param->has_prmt = (sm >= 20) ? true : false; + if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask) + { + event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter."); + event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt); - /* - #define RUN_INSTRUCTION_CHECKS() \ - device_param->has_add = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_addc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_sub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_subc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ - device_param->has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - - if (backend_devices_idx > 0) - { - hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + return -1; + } + } - if (is_same_device_type (device_param, device_param_prev) == true) - { - device_param->has_add = device_param_prev->has_add; - device_param->has_addc = device_param_prev->has_addc; - device_param->has_sub = device_param_prev->has_sub; - device_param->has_subc = device_param_prev->has_subc; - device_param->has_bfe = device_param_prev->has_bfe; - device_param->has_lop3 = device_param_prev->has_lop3; - device_param->has_mov64 = device_param_prev->has_mov64; - device_param->has_prmt = device_param_prev->has_prmt; - } - else - { - RUN_INSTRUCTION_CHECKS(); - } - } - else - { - RUN_INSTRUCTION_CHECKS(); - } + // time or resource intensive operations which we do not run if the corresponding device was skipped by the user - #undef RUN_INSTRUCTION_CHECKS - */ - } + if (backend_ctx->cuda) + { + // instruction test for cuda devices was replaced with fixed values (see above) - // device_available_mem + /* + CUcontext cuda_context; - #define MAX_ALLOC_CHECKS_CNT 8192 - #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024) + if (hc_cuCtxCreate (hashcat_ctx, &cuda_context, CU_CTX_SCHED_BLOCKING_SYNC, device_param->cuda_device) == -1) return -1; - device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; + if (hc_cuCtxSetCurrent (hashcat_ctx, cuda_context) == -1) return -1; - #if defined (_WIN) - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) - #else - if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))) - #endif - { - // OK, so the problem here is the following: - // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, - // but there's no way to ask for available memory on the device. - // In combination, most OpenCL runtimes implementation of clCreateBuffer() - // are doing so called lazy memory allocation on the device. - // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory) - // running on the host we end up with an error type of this: - // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE - // The clEnqueueNDRangeKernel() is because of the lazy allocation - // The best way to workaround this problem is if we would be able to ask for available memory, - // The idea here is to try to evaluate available memory by allocating it till it errors + #define RUN_INSTRUCTION_CHECKS() \ + device_param->has_add = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_addc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_sub = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_subc = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_bfe = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_lop3 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_mov64 = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned long long r; unsigned int a; unsigned int b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ + device_param->has_prmt = cuda_test_instruction (hashcat_ctx, sm_major, sm_minor, "__global__ void test () { unsigned int r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem)); + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; - u64 c; + if (is_same_device_type (device_param, device_param_prev) == true) + { + device_param->has_add = device_param_prev->has_add; + device_param->has_addc = device_param_prev->has_addc; + device_param->has_sub = device_param_prev->has_sub; + device_param->has_subc = device_param_prev->has_subc; + device_param->has_bfe = device_param_prev->has_bfe; + device_param->has_lop3 = device_param_prev->has_lop3; + device_param->has_mov64 = device_param_prev->has_mov64; + device_param->has_prmt = device_param_prev->has_prmt; + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); + } - for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) - { - if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; + #undef RUN_INSTRUCTION_CHECKS - cl_int CL_err; + if (hc_cuCtxDestroy (hashcat_ctx, cuda_context) == -1) return -1; - OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; + */ + } - tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err); + if (backend_ctx->ocl) + { + for (int backend_devices_cnt = 0; backend_devices_cnt < backend_ctx->backend_devices_cnt; backend_devices_cnt++) + { + hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_cnt]; - if (CL_err != CL_SUCCESS) - { - c--; + if (device_param->is_opencl == false) continue; - break; - } + if (device_param->skipped == true) continue; - // transfer only a few byte should be enough to force the runtime to actually allocate the memory + /** + * create context for each device + */ - u8 tmp_host[8]; + cl_context context; - if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + /* + cl_context_properties properties[3]; - if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + properties[0] = CL_CONTEXT_PLATFORM; + properties[1] = (cl_context_properties) device_param->opencl_platform; + properties[2] = 0; - if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + CL_rc = hc_clCreateContext (hashcat_ctx, properties, 1, &device_param->opencl_device, NULL, NULL, &context); + */ - if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; - } + if (hc_clCreateContext (hashcat_ctx, NULL, 1, &device_param->opencl_device, NULL, NULL, &context) == -1) return -1; + + /** + * create command-queue + */ - device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; - if (c > 0) + cl_command_queue command_queue; + + if (hc_clCreateCommandQueue (hashcat_ctx, context, device_param->opencl_device, 0, &command_queue) == -1) return -1; + + // instruction set + + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)) + { + #define RUN_INSTRUCTION_CHECKS() + device_param->has_vadd = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vaddc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vadd_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vaddc_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADDC_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vsub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vsubb = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vsub_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUB_CO_U32 %0, vcc, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vsubb_co = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_SUBB_CO_U32 %0, vcc, 0, 0, vcc;\" : \"=v\"(r1)); }"); \ + device_param->has_vadd3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vbfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + device_param->has_vperm = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r1; __asm__ __volatile__ (\"V_PERM_B32 %0, 0, 0, 0;\" : \"=v\"(r1)); }"); \ + + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; + + if (is_same_device_type (device_param, device_param_prev) == true) { - device_param->device_available_mem *= c; + device_param->has_vadd = device_param_prev->has_vadd; + device_param->has_vaddc = device_param_prev->has_vaddc; + device_param->has_vadd_co = device_param_prev->has_vadd_co; + device_param->has_vaddc_co = device_param_prev->has_vaddc_co; + device_param->has_vsub = device_param_prev->has_vsub; + device_param->has_vsubb = device_param_prev->has_vsubb; + device_param->has_vsub_co = device_param_prev->has_vsub_co; + device_param->has_vsubb_co = device_param_prev->has_vsubb_co; + device_param->has_vadd3 = device_param_prev->has_vadd3; + device_param->has_vbfe = device_param_prev->has_vbfe; + device_param->has_vperm = device_param_prev->has_vperm; } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + + #undef RUN_INSTRUCTION_CHECKS + } - // clean up + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) + { + // replaced with fixed values see non time intensive section above - for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) - { - if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; + /* + #define RUN_INSTRUCTION_CHECKS() \ + device_param->has_add = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"add.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_addc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"addc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_sub = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"sub.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_subc = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"subc.cc.u32 %0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_bfe = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"bfe.u32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_lop3 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"lop3.b32 %0, 0, 0, 0, 0;\" : \"=r\"(r)); }"); \ + device_param->has_mov64 = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { ulong r; uint a; uint b; asm volatile (\"mov.b64 %0, {%1, %2};\" : \"=l\"(r) : \"r\"(a), \"r\"(b)); }"); \ + device_param->has_prmt = opencl_test_instruction (hashcat_ctx, context, device_param->opencl_device, "__kernel void test () { uint r; asm volatile (\"prmt.b32 %0, 0, 0, 0;\" : \"=r\"(r)); }"); \ - if (tmp_device[c] != NULL) - { - if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1; - } - } + if (backend_devices_idx > 0) + { + hc_device_param_t *device_param_prev = &devices_param[backend_devices_idx - 1]; - hcfree (tmp_device); + if (is_same_device_type (device_param, device_param_prev) == true) + { + device_param->has_add = device_param_prev->has_add; + device_param->has_addc = device_param_prev->has_addc; + device_param->has_sub = device_param_prev->has_sub; + device_param->has_subc = device_param_prev->has_subc; + device_param->has_bfe = device_param_prev->has_bfe; + device_param->has_lop3 = device_param_prev->has_lop3; + device_param->has_mov64 = device_param_prev->has_mov64; + device_param->has_prmt = device_param_prev->has_prmt; + } + else + { + RUN_INSTRUCTION_CHECKS(); + } + } + else + { + RUN_INSTRUCTION_CHECKS(); } - hc_clReleaseCommandQueue (hashcat_ctx, command_queue); - - hc_clReleaseContext (hashcat_ctx, context); + #undef RUN_INSTRUCTION_CHECKS + */ } - } - } - backend_ctx->opencl_devices_cnt = opencl_devices_cnt; - backend_ctx->opencl_devices_active = opencl_devices_active; + // available device memory + // This test causes an GPU memory usage spike. + // In case there are multiple hashcat instances starting at the same time this will cause GPU out of memory errors which otherwise would not exist. + // We will simply not run it if that device was skipped by the user. - // all devices combined go into backend_* variables + #define MAX_ALLOC_CHECKS_CNT 8192 + #define MAX_ALLOC_CHECKS_SIZE (64 * 1024 * 1024) - backend_ctx->backend_devices_cnt = cuda_devices_cnt + opencl_devices_cnt; - backend_ctx->backend_devices_active = cuda_devices_active + opencl_devices_active; + device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE; - // find duplicate devices + #if defined (_WIN) + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV)) + #else + if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD))) + #endif + { + // OK, so the problem here is the following: + // There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device, + // but there's no way to ask for available memory on the device. + // In combination, most OpenCL runtimes implementation of clCreateBuffer() + // are doing so called lazy memory allocation on the device. + // Now, if the user has X11 (or a game or anything that takes a lot of GPU memory) + // running on the host we end up with an error type of this: + // clEnqueueNDRangeKernel(): CL_MEM_OBJECT_ALLOCATION_FAILURE + // The clEnqueueNDRangeKernel() is because of the lazy allocation + // The best way to workaround this problem is if we would be able to ask for available memory, + // The idea here is to try to evaluate available memory by allocating it till it errors - //if ((cuda_devices_cnt > 0) && (opencl_devices_cnt > 0)) - //{ - // using force here enables both devices, which is the worst possible outcome - // many users force by default, so this is not a good idea + cl_mem *tmp_device = (cl_mem *) hccalloc (MAX_ALLOC_CHECKS_CNT, sizeof (cl_mem)); - //if (user_options->force == false) - //{ - backend_ctx_find_alias_devices (hashcat_ctx); - //{ - //} + u64 c; - if (backend_ctx->backend_devices_active == 0) - { - event_log_error (hashcat_ctx, "No devices found/left."); + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; - return -1; - } + cl_int CL_err; - // now we can calculate the number of parallel running hook threads based on - // the number cpu cores and the number of active compute devices - // unless overwritten by the user + OCL_PTR *ocl = (OCL_PTR *) backend_ctx->ocl; - if (user_options->hook_threads == HOOK_THREADS) - { - const u32 processor_count = hc_get_processor_count (); + tmp_device[c] = ocl->clCreateBuffer (context, CL_MEM_READ_WRITE, MAX_ALLOC_CHECKS_SIZE, NULL, &CL_err); - const u32 processor_count_cu = CEILDIV (processor_count, backend_ctx->backend_devices_active); // should never reach 0 + if (CL_err != CL_SUCCESS) + { + c--; - user_options->hook_threads = processor_count_cu; - } + break; + } - // additional check to see if the user has chosen a device that is not within the range of available devices (i.e. larger than devices_cnt) + // transfer only a few byte should be enough to force the runtime to actually allocate the memory - if (backend_ctx->backend_devices_filter != (u64) -1) - { - const u64 backend_devices_cnt_mask = ~(((u64) -1 >> backend_ctx->backend_devices_cnt) << backend_ctx->backend_devices_cnt); + u8 tmp_host[8]; - if (backend_ctx->backend_devices_filter > backend_devices_cnt_mask) - { - event_log_error (hashcat_ctx, "An invalid device was specified using the --backend-devices parameter."); - event_log_error (hashcat_ctx, "The specified device was higher than the number of available devices (%u).", backend_ctx->backend_devices_cnt); + if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; - return -1; + if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, 0, sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + + if (ocl->clEnqueueReadBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + + if (ocl->clEnqueueWriteBuffer (command_queue, tmp_device[c], CL_TRUE, MAX_ALLOC_CHECKS_SIZE - sizeof (tmp_host), sizeof (tmp_host), tmp_host, 0, NULL, NULL) != CL_SUCCESS) break; + } + + device_param->device_available_mem = MAX_ALLOC_CHECKS_SIZE; + + if (c > 0) + { + device_param->device_available_mem *= c; + } + + // clean up + + for (c = 0; c < MAX_ALLOC_CHECKS_CNT; c++) + { + if (((c + 1 + 1) * MAX_ALLOC_CHECKS_SIZE) >= device_param->device_global_mem) break; + + if (tmp_device[c] != NULL) + { + if (hc_clReleaseMemObject (hashcat_ctx, tmp_device[c]) == -1) return -1; + } + } + + hcfree (tmp_device); + } + + hc_clReleaseCommandQueue (hashcat_ctx, command_queue); + + hc_clReleaseContext (hashcat_ctx, context); } } diff --git a/src/modules/module_11300.c b/src/modules/module_11300.c index 044383bf0..bf9833fc3 100644 --- a/src/modules/module_11300.c +++ b/src/modules/module_11300.c @@ -26,7 +26,8 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_ST_HEX | OPTS_TYPE_ST_ADD80 - | OPTS_TYPE_HASH_COPY; + | OPTS_TYPE_HASH_COPY + | OPTS_TYPE_DEEP_COMP_KERNEL; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$bitcoin$96$c265931309b4a59307921cf054b4ec6b6e4554369be79802e94e16477645777d948ae1d375191831efc78e5acd1f0443$16$8017214013543185$200460$96$480008005625057442352316337722323437108374245623701184230273883222762730232857701607167815448714$66$014754433300175043011633205413774877455616682000536368706315333388"; @@ -100,6 +101,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY return jit_build_options; } +u32 module_deep_comp_kernel (MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos) +{ + return KERN_RUN_3; +} + u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (bitcoin_wallet_t); @@ -283,7 +289,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_benchmark_mask = MODULE_DEFAULT; module_ctx->module_benchmark_salt = MODULE_DEFAULT; module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; - module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = module_deep_comp_kernel; module_ctx->module_dgst_pos0 = module_dgst_pos0; module_ctx->module_dgst_pos1 = module_dgst_pos1; module_ctx->module_dgst_pos2 = module_dgst_pos2; diff --git a/src/modules/module_13200.c b/src/modules/module_13200.c index 62641dde1..e82bc01a2 100644 --- a/src/modules/module_13200.c +++ b/src/modules/module_13200.c @@ -17,7 +17,7 @@ static const u32 DGST_POS2 = 2; static const u32 DGST_POS3 = 3; static const u32 DGST_SIZE = DGST_SIZE_4_4; static const u32 HASH_CATEGORY = HASH_CATEGORY_ARCHIVE; -static const char *HASH_NAME = "AxCrypt"; +static const char *HASH_NAME = "AxCrypt 1"; static const u64 KERN_TYPE = 13200; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; diff --git a/src/modules/module_13300.c b/src/modules/module_13300.c index 3a24a8e17..9fae83b87 100644 --- a/src/modules/module_13300.c +++ b/src/modules/module_13300.c @@ -17,7 +17,7 @@ static const u32 DGST_POS2 = 3; static const u32 DGST_POS3 = 2; static const u32 DGST_SIZE = DGST_SIZE_4_5; static const u32 HASH_CATEGORY = HASH_CATEGORY_ARCHIVE; -static const char *HASH_NAME = "AxCrypt in-memory SHA1"; +static const char *HASH_NAME = "AxCrypt 1 in-memory SHA1"; static const u64 KERN_TYPE = 13300; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_PRECOMPUTE_INIT diff --git a/src/modules/module_14800.c b/src/modules/module_14800.c index 26926f661..99e634848 100644 --- a/src/modules/module_14800.c +++ b/src/modules/module_14800.c @@ -21,7 +21,8 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_ARCHIVE; static const char *HASH_NAME = "iTunes backup >= 10.0"; static const u64 KERN_TYPE = 14800; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; + | OPTI_TYPE_SLOW_HASH_SIMD_LOOP + | OPTI_TYPE_SLOW_HASH_SIMD_LOOP2; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_ST_HEX | OPTS_TYPE_INIT2 diff --git a/src/modules/module_23300.c b/src/modules/module_23300.c new file mode 100644 index 000000000..7b085db3a --- /dev/null +++ b/src/modules/module_23300.c @@ -0,0 +1,327 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_DOCUMENTS; +static const char *HASH_NAME = "Apple iWork"; +static const u64 KERN_TYPE = 23300; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_ST_HEX; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "$iwork$2$1$1$4000$b31b7320d1e7a5ee$01f54d6f9e5090eb16fef2b05f8242bc$69561c985268326b7353fb22c3685a378341127557bd2bbea1bd10afb31f2127344707b662a2c29480c32b8b93dea0538327f604e5aa8733be83af25f370f7ac"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +typedef struct iwork_tmp +{ + u32 ipad[5]; + u32 opad[5]; + + u32 dgst[5]; + u32 out[5]; + +} iwork_tmp_t; + +typedef struct iwork +{ + u32 iv[4]; + u32 data[16]; + +} iwork_t; + +static const char *SIGNATURE_IWORK = "$iwork$"; +static const u32 FORMAT_NUM_IWORK = 1; + +u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 tmp_size = (const u64) sizeof (iwork_tmp_t); + + return tmp_size; +} + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (iwork_t); + + return esalt_size; +} + +u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + // this overrides the reductions of PW_MAX in case optimized kernel is selected + // IOW, even in optimized kernel mode it support length 256 + + const u32 pw_max = PW_MAX; + + return pw_max; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + iwork_t *iwork = (iwork_t *) esalt_buf; + + token_t token; + + token.token_cnt = 8; + + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_IWORK; + + token.len[0] = 7; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + token.len_min[1] = 1; + token.len_max[1] = 1; + token.sep[1] = '$'; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + + token.len_min[2] = 1; + token.len_max[2] = 1; + token.sep[2] = '$'; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + + token.len_min[3] = 1; + token.len_max[3] = 1; + token.sep[3] = '$'; + token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + + token.len_min[4] = 4; + token.len_max[4] = 6; + token.sep[4] = '$'; + token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + + token.len_min[5] = 16; + token.len_max[5] = 32; + token.sep[5] = '$'; + token.attr[5] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + token.len_min[6] = 32; + token.len_max[6] = 32; + token.sep[6] = '$'; + token.attr[6] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + token.len[7] = 128; + token.attr[7] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *hash_ver_pos = token.buf[1]; + const u8 *file_ver_pos = token.buf[2]; + const u8 *format_ver_pos = token.buf[3]; + + const u32 hash_ver = hc_strtoul ((const char *) hash_ver_pos, NULL, 10); + const u32 file_ver = hc_strtoul ((const char *) file_ver_pos, NULL, 10); + const u32 format_ver = hc_strtoul ((const char *) format_ver_pos, NULL, 10); + + if (format_ver != FORMAT_NUM_IWORK) return (PARSER_SALT_VALUE); + + if ((hash_ver != 1) && (hash_ver != 2)) return (PARSER_SALT_VALUE); + if ((file_ver != 1) && (file_ver != 2)) return (PARSER_SALT_VALUE); + + salt->salt_sign[0] = hash_ver; + salt->salt_sign[1] = file_ver; + + const u8 *iter_pos = token.buf[4]; + + const u32 iterations = hc_strtoul ((const char *) iter_pos, NULL, 10); + + if (iterations < 1000) return (PARSER_SALT_ITERATION); + if (iterations > 999999) return (PARSER_SALT_ITERATION); + + salt->salt_iter = iterations - 1; + + // salt + + const u8 *salt_pos = token.buf[5]; + const int salt_len = token.len[5]; + + const bool parse_rc = generic_salt_decode (hashconfig, salt_pos, salt_len, (u8 *) salt->salt_buf, (int *) &salt->salt_len); + + salt->salt_buf[0] = byte_swap_32 (salt->salt_buf[0]); + salt->salt_buf[1] = byte_swap_32 (salt->salt_buf[1]); + salt->salt_buf[2] = byte_swap_32 (salt->salt_buf[2]); + salt->salt_buf[3] = byte_swap_32 (salt->salt_buf[3]); + + if (parse_rc == false) return (PARSER_SALT_LENGTH); + + // IV + + const u8 *iv_pos = token.buf[6]; + const int iv_len = token.len[6]; + + hex_decode (iv_pos, iv_len, (u8 *) iwork->iv); + + // data + + const u8 *data_pos = token.buf[7]; + const int data_len = token.len[7]; + + hex_decode (data_pos, data_len, (u8 *) iwork->data); + + // fake digest + + digest[0] = iwork->data[0]; + digest[1] = iwork->data[1]; + digest[2] = iwork->data[2]; + digest[3] = iwork->data[3]; + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const iwork_t *iwork = (const iwork_t *) esalt_buf; + + // salt + + u32 tmp_salt[4] = { 0 }; + + tmp_salt[0] = byte_swap_32 (salt->salt_buf[0]); + tmp_salt[1] = byte_swap_32 (salt->salt_buf[1]); + tmp_salt[2] = byte_swap_32 (salt->salt_buf[2]); + tmp_salt[3] = byte_swap_32 (salt->salt_buf[3]); + + char salt_hex[33] = { 0 }; + + generic_salt_encode (hashconfig, (const u8 *) tmp_salt, (const int) salt->salt_len, (u8 *) salt_hex); + + // iv + + u8 iv_hex[33] = { 0 }; + + hex_encode ((u8 *) iwork->iv, 16, iv_hex); + + // data + + u8 data_hex[129] = { 0 }; + + hex_encode ((u8 *) iwork->data, 64, data_hex); + + int out_len = snprintf (line_buf, line_size, "%s%u$%u$%u$%u$%s$%s$%s", + SIGNATURE_IWORK, + salt->salt_sign[0], + salt->salt_sign[1], + FORMAT_NUM_IWORK, + salt->salt_iter + 1, + salt_hex, + iv_hex, + data_hex + ); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = module_pw_max; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = module_tmp_size; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m23300.pm b/tools/test_modules/m23300.pm new file mode 100644 index 000000000..55278f77b --- /dev/null +++ b/tools/test_modules/m23300.pm @@ -0,0 +1,148 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::SHA qw (sha256); +use Crypt::PBKDF2; +use Crypt::CBC; + +sub module_constraints { [[0, 256], [32, 32], [-1, -1], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + my $salt = shift; + my $hash_ver = shift; + my $file_ver = shift; + my $iter = shift; + my $iv = shift; + my $data = shift; + + my $FORMAT = 1; + + my $is_decrypt = defined ($data); + + if ($is_decrypt == 0) + { + my $type = random_number (1, 2); + + if ($type == 1) + { + $hash_ver = 1; + $file_ver = 2; + + $iter = 100000; + $salt = substr ($salt, 0, 32); # full one + } + else + { + $hash_ver = 2; + $file_ver = 1; + + $iter = 4000; + $salt = substr ($salt, 0, 16); + } + + $salt = pack ("H*", $salt); + + $iv = random_bytes (16); + $data = random_bytes (32); + + $data .= sha256 ($data); + } + + my $pbkdf2 = Crypt::PBKDF2->new + ( + hasher => Crypt::PBKDF2->hasher_from_algorithm ('HMACSHA1'), + iterations => $iter, + output_len => 16, + ); + + my $key = $pbkdf2->PBKDF2 ($salt, $word); + + # AES-CBC + + my $cipher = Crypt::CBC->new ({ + cipher => "Crypt::Rijndael", + key => $key, + iv => $iv, + keysize => 16, + literal_key => 1, + header => "none", + padding => "null" + }); + + if ($is_decrypt == 1) + { + my $hash_data = $data; + + $data = "WRONG"; + + my $decrypted = $cipher->decrypt ($hash_data); + + my $raw_data = substr ($decrypted, 0, 32); + my $checksum = substr ($decrypted, 32, 32); + + my $sha256_of_data = sha256 ($raw_data); + + if ($sha256_of_data eq $checksum) + { + $data = $decrypted; + } + } + + my $encrypted = $cipher->encrypt ($data); + + my $hash = sprintf ("\$iwork\$%i\$%i\$%i\$%i\$%s\$%s\$%s", $hash_ver, $file_ver, $FORMAT, $iter, unpack ("H*", $salt), unpack ("H*", $iv), unpack ("H*", $encrypted)); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my $idx = index ($line, ':'); + + return unless $idx >= 0; + + my $hash = substr ($line, 0, $idx); + my $word = substr ($line, $idx + 1); + + return unless substr ($hash, 0, 7) eq '$iwork$'; + + my (undef, undef, $hash_ver, $file_ver, $format, $iter, $salt, $iv, $data) = split '\$', $hash; + + next unless (defined ($hash_ver)); + next unless (defined ($file_ver)); + next unless (defined ($format)); + next unless (defined ($iter)); + next unless (defined ($salt)); + next unless (defined ($iv)); + next unless (defined ($data)); + + next unless (($hash_ver eq '1') or ($hash_ver eq '2')); + next unless (($file_ver eq '1') or ($file_ver eq '2')); + + next unless ($format eq '1'); + + $salt = pack ("H*", $salt); + $iv = pack ("H*", $iv); + $data = pack ("H*", $data); + + $iter = int ($iter); + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed, $salt, $hash_ver, $file_ver, $iter, $iv, $data); + + return ($new_hash, $word); +} + +1;